1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-01-12 01:41:26 +00:00

Compare commits

..

53 Commits

Author SHA1 Message Date
bashonly
27afb31edc [ie/tarangplus] Sanitize m3u8 URLs (#15502)
Fix 260ba3abba

Closes #15501
Authored by: bashonly
2026-01-06 05:44:30 +00:00
InvalidUsernameException
48b845a296 [ie/zdf] Support sister sites URLs (#15370)
Closes #13319
Authored by: InvalidUsernameException
2026-01-06 04:56:18 +00:00
clayote
cec1f1df79 Fix --parse-metadata when TO is a single field name (#14577)
Closes #14576
Authored by: clayote, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2026-01-05 03:19:30 +00:00
0x∅
ba499ab0dc [ie/croatian.film] Add extractor (#15468)
Closes #15464
Authored by: 0xvd
2026-01-04 17:43:47 +00:00
0x∅
5a481d65fa [ie/hotstar] Extract from new API (#15480)
Closes #15479
Authored by: 0xvd
2026-01-04 04:52:37 +00:00
Cédric Luthi
6ae9e95687 [ie/tv5unis] Fix extractors (#15477)
Closes #12662
Authored by: 0xced
2026-01-04 01:02:29 +00:00
pomtnp
9c393e3f62 [ie/tiktok] Extract save_count (#15054)
Closes #15053
Authored by: pomtnp
2026-01-03 21:48:42 +00:00
Emi
87a265d820 [ie/tumblr] Extract timestamp (#15462)
Authored by: alch-emi
2026-01-03 20:54:29 +00:00
doe1080
4d4c7e1c69 [utils] js_to_json: Prevent false positives for octals (#15474)
Authored by: doe1080
2026-01-03 20:53:16 +00:00
João Victor Fernandes Oliveira
0066de5b7e [ie/zoom] Extract recordings with start times (#15475)
Authored by: JV-Fernandes
2026-01-03 20:30:38 +00:00
Oliver Pfeiffer
5026548d65 [ie/bigo] Support --wait-for-video (#15463)
Authored by: olipfei
2026-01-03 00:20:59 +00:00
0x∅
e15ca65874 [ie/twitch:videos] Raise error when channel is not found (#15458)
Closes #15450
Authored by: 0xvd
2026-01-03 00:17:38 +00:00
bashonly
3763d0d4ab [build] Improve nightly release check (#15455)
Authored by: bashonly
2026-01-02 16:02:58 +00:00
Subrat Lima
260ba3abba [ie/tarangplus] Add extractors (#13060)
Closes #13020
Authored by: subrat-lima
2026-01-02 00:15:25 +00:00
ptlydpr
878a41e283 [ie/pandatv] Add extractor (#13210)
Authored by: ptlydpr
2026-01-01 01:24:14 +01:00
bashonly
76c31a7a21 [ie/youtube] Fix comment subthreads extraction (#15448)
Fix d22436e5dc

Closes #15444
Authored by: bashonly
2025-12-31 09:56:26 +00:00
bashonly
ab3ff2d5dd [build] Harden CI/CD pipeline (#15387)
* NOTE: the release workflows' new handling of secrets
  may be a breaking change for forks that are using any secrets
  other than GPG_SIGNING_KEY or ARCHIVE_REPO_TOKEN.

  Previously, the release workflow would try to resolve a token
  secret name based on the `target` or `source` input,
  e.g. NIGHTLY_ARCHIVE_REPO_TOKEN or CUSTOM_ARCHIVE_REPO_TOKEN,
  and then fall back to using the ARCHIVE_REPO_TOKEN secret if the
  resolved token secret name was not found in the repository.

  This behavior has been replaced by the release workflow
  always using the ARCHIVE_REPO_TOKEN secret as the token
  for publishing releases to any external archive repository.

* Add zizmor CI job for auditing workflows

* Pin all actions to commit hashes instead of symbolic references

* Explicitly set GITHUB_TOKEN permissions at the job level

* Use actions/checkout with `persist-credentials: false` whenever possible

* Remove/replace template expansions in workflow scripts

* Remove all usage of actions/cache from build/release workflows

* Remove the cache-warmer.yml workflow

* Remove the unused download.yml workflow

* Set concurrency limits for any workflows that are triggered by PRs

* Avoid loading the entire secrets context

* Replace usage of `secrets: inherit` with explicit `secrets:` blocks

* Pin all external docker images to hash that are used by the build workflow

* Explicitly set `shell: bash` for some steps to avoid pwsh or set pipefail

* Ensure any pwsh steps will fail on non-zero exit codes

Authored by: bashonly
2025-12-30 21:05:10 +00:00
bashonly
468aa6a9b4 [ie/youtube] Fix tracking of parent comment among replies (#15439)
Fix d22436e5dc

Closes #15438
Authored by: bashonly
2025-12-30 20:53:33 +00:00
prettysunflower
6c918c5071 [ie/nebula:season] Support more URLs (#15436)
Authored by: prettysunflower
2025-12-30 21:41:19 +01:00
sepro
09078190b0 [ie/iqiyi] Remove broken login support (#15441)
Authored by: seproDev
2025-12-30 15:02:35 +01:00
sepro
4a772e5289 [ie/scte] Remove extractors (#15442)
Authored by: seproDev
2025-12-30 15:01:24 +01:00
cesbar
f24b9ac0c9 [utils] decode_packed_codes: Fix missing key handling (#15440)
Authored by: cesbar
2025-12-30 14:57:42 +01:00
bashonly
2a7e048a60 [ie/facebook] Remove broken login support (#15434)
Authored by: bashonly
2025-12-30 00:48:11 +00:00
bashonly
a6ba714005 [ie/twitter] Remove broken login support (#15432)
Closes #12616
Authored by: bashonly
2025-12-30 00:22:33 +00:00
bashonly
ce9a3591f8 [ie/twitter] Do not extract non-video posts from unified_cards (#15431)
Closes #15402
Authored by: bashonly
2025-12-30 00:20:44 +00:00
bashonly
d22436e5dc [ie/youtube] Support comment subthreads (#15419)
* Support newly rolled out comment "subthreads"
* Fix comments extraction: all replies were being missed
* Add a `max-depth` element to the `max_comments` extractor-arg
* Fully remove the deprecated `max_comment_depth` extractor-arg

Closes #15303
Authored by: bashonly
2025-12-29 21:46:29 +00:00
bashonly
abf29e3e72 [ie/youtube] Fix skip_player=js extractor-arg (#15428)
Authored by: bashonly
2025-12-29 21:41:48 +00:00
Mike Fährmann
fcd47d2db3 [ie/picarto] Fix extraction when stream has no title (#15407)
Closes #14540
Authored by: mikf
2025-12-29 02:50:03 +00:00
bashonly
cea825e7e0 [ie/generic] Improve detection of blockage due to TLS fingerprint (#15426)
Authored by: bashonly
2025-12-29 01:02:09 +00:00
sepro
c0a7c594a9 [utils] mimetype2ext: Recognize more srt types (#15411)
Authored by: seproDev
2025-12-26 19:00:45 +01:00
sepro
6b23305822 [ie/manoto] Remove extractor (#15414)
Authored by: seproDev
2025-12-26 18:57:08 +01:00
sepro
6d92f87ddc [ie/cda] Support mobile URLs (#15398)
Closes #15397
Authored by: seproDev
2025-12-25 02:25:03 +01:00
sepro
9bf040dc6f [utils] random_user_agent: Bump versions (#15396)
Authored by: seproDev
2025-12-24 21:47:50 +01:00
doe1080
15263d049c [utils] unified_timestamp: Add tz_offset parameter (#15357)
Allows datetime strings without a timezone to be parsed with the correct offset

Authored by: doe1080
2025-12-20 19:52:53 +00:00
0x∅
0ea6cc6d82 [ie/netease:program] Support DJ URLs (#15365)
Closes #15364
Authored by: 0xvd
2025-12-20 10:09:22 +00:00
0x∅
e9d4b22b9b [ie/bandcamp:weekly] Fix extractor (#15208)
Closes #13963
Authored by: 0xvd, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-12-20 03:54:08 +00:00
0x∅
97fb78a5b9 [ie/yahoo] Fix extractor (#15314)
Closes #15211
Authored by: 0xvd, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-12-20 02:58:47 +00:00
0x∅
f5270705e8 [ie/nebula:season] Add extractor (#15347)
Closes #15343
Authored by: 0xvd, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-12-20 01:51:09 +00:00
bashonly
a6a8f6b6d6 [ci] Explicitly declare permissions and limit credentials (#15324)
Authored by: bashonly
2025-12-19 19:22:23 +00:00
bashonly
825648a740 [build] Bump official actions to latest versions (#15305)
* Bump actions/cache → v5
* Bump actions/upload-artifact → v6
* Bump actions/download-artifact → v7

Authored by: bashonly
2025-12-19 19:04:52 +00:00
bashonly
e0bb477732 Bypass interactive format selection if no formats are found (#15278)
Authored by: bashonly
2025-12-19 18:57:55 +00:00
delta
c0c9cac554 [ie/filmarchiv] Add extractor (#13490)
Closes #14821
Authored by: 4elta
2025-12-19 00:44:58 +00:00
0x∅
f0bc71abf6 [ie/tubitv] Support URLs with locales (#15205)
Closes #15176
Authored by: 0xvd
2025-12-19 00:26:53 +00:00
0x∅
8a4b626daf [ie/dropbox] Support videos in folders (#15313)
Closes #15312
Authored by: 0xvd
2025-12-19 00:24:13 +00:00
0x∅
f6dc7d5279 Accept float values for --sleep-subtitles (#15282)
Closes #15269
Authored by: 0xvd
2025-12-18 23:42:50 +00:00
quietvoid
c5e55e0479 [ie/gofile] Fix extractor (#15296)
Authored by: quietvoid
2025-12-18 23:42:13 +00:00
doe1080
6d4984e64e [ie/nextmedia] Remove extractors (#15354)
Authored by: doe1080
2025-12-18 21:36:15 +00:00
doe1080
a27ec9efc6 [ie/netzkino] Rework extractor (#15351)
Authored by: doe1080
2025-12-18 21:32:54 +00:00
bashonly
ff61bef041 [ie/youtube:tab] Fix flat thumbnails extraction for shorts (#15331)
Closes #15329
Authored by: bashonly
2025-12-15 22:37:25 +00:00
sepro
04f2ec4b97 [ie/parti] Fix extractors (#15319)
Authored by: seproDev
2025-12-13 20:00:56 +01:00
0x∅
b6f24745bf [ie/telecinco] Fix extractor (#15311)
Closes #15240
Authored by: 0xvd, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2025-12-12 22:25:45 +00:00
norepro
f2ee2a46fc [ie/pornhub] Optimize metadata extraction (#15231)
Closes #14621
Authored by: norepro
2025-12-12 20:52:09 +00:00
bashonly
5f37f67d37 [ie/archive.org] Fix metadata extraction (#15286)
Closes #15280
Authored by: bashonly
2025-12-09 19:05:12 +00:00
66 changed files with 1530 additions and 1799 deletions

View File

@@ -1,5 +1,4 @@
config-variables:
- KEEP_CACHE_WARM
- PUSH_VERSION_COMMIT
- UPDATE_TO_VERIFICATION
- PYPI_PROJECT

View File

@@ -74,11 +74,11 @@ on:
default: true
type: boolean
permissions:
contents: read
permissions: {}
jobs:
process:
name: Process
runs-on: ubuntu-latest
outputs:
origin: ${{ steps.process_inputs.outputs.origin }}
@@ -146,7 +146,6 @@ jobs:
'runner': 'ubuntu-24.04-arm',
'qemu_platform': 'linux/arm/v7',
'onefile': False,
'cache_requirements': True,
'update_to': 'yt-dlp/yt-dlp@2023.03.04',
}],
'musllinux': [{
@@ -175,7 +174,6 @@ jobs:
exe.setdefault('qemu_platform', None)
exe.setdefault('onefile', True)
exe.setdefault('onedir', True)
exe.setdefault('cache_requirements', False)
exe.setdefault('python_version', os.environ['PYTHON_VERSION'])
exe.setdefault('update_to', os.environ['UPDATE_TO'])
if not any(INPUTS.get(key) for key in EXE_MAP):
@@ -186,8 +184,11 @@ jobs:
f.write(f'matrix={json.dumps(matrix)}')
unix:
needs: process
name: unix
needs: [process]
if: inputs.unix
permissions:
contents: read
runs-on: ubuntu-latest
env:
CHANNEL: ${{ inputs.channel }}
@@ -196,11 +197,12 @@ jobs:
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0 # Needed for changelog
persist-credentials: false
- uses: actions/setup-python@v6
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.10"
@@ -229,7 +231,7 @@ jobs:
[[ "${version}" != "${downgraded_version}" ]]
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-bin-${{ github.job }}
path: |
@@ -239,8 +241,10 @@ jobs:
linux:
name: ${{ matrix.os }} (${{ matrix.arch }})
needs: [process]
if: inputs.linux || inputs.linux_armv7l || inputs.musllinux
needs: process
permissions:
contents: read
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
@@ -257,26 +261,16 @@ jobs:
SKIP_ONEFILE_BUILD: ${{ (!matrix.onefile && '1') || '' }}
steps:
- uses: actions/checkout@v6
- name: Cache requirements
if: matrix.cache_requirements
id: cache-venv
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
path: |
venv
key: cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
restore-keys: |
cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-${{ github.ref }}-
cache-reqs-${{ matrix.os }}_${{ matrix.arch }}-
persist-credentials: false
- name: Set up QEMU
if: matrix.qemu_platform
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
with:
image: tonistiigi/binfmt:qemu-v10.0.4-56@sha256:30cc9a4d03765acac9be2ed0afc23af1ad018aed2c28ea4be8c2eb9afe03fbd1
cache-image: false
platforms: ${{ matrix.qemu_platform }}
- name: Build executable
@@ -300,7 +294,7 @@ jobs:
docker compose up --build --exit-code-from "${SERVICE}" "${SERVICE}"
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-bin-${{ matrix.os }}_${{ matrix.arch }}
path: |
@@ -308,7 +302,8 @@ jobs:
compression-level: 0
macos:
needs: process
name: macos
needs: [process]
if: inputs.macos
permissions:
contents: read
@@ -320,21 +315,11 @@ jobs:
UPDATE_TO: yt-dlp/yt-dlp@2025.09.05
steps:
- uses: actions/checkout@v6
# NB: Building universal2 does not work with python from actions/setup-python
- name: Cache requirements
id: cache-venv
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
path: |
~/yt-dlp-build-venv
key: cache-reqs-${{ github.job }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
restore-keys: |
cache-reqs-${{ github.job }}-${{ github.ref }}-
cache-reqs-${{ github.job }}-
persist-credentials: false
# NB: Building universal2 does not work with python from actions/setup-python
- name: Install Requirements
run: |
@@ -399,7 +384,7 @@ jobs:
[[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-bin-${{ github.job }}
path: |
@@ -409,7 +394,7 @@ jobs:
windows:
name: windows (${{ matrix.arch }})
needs: process
needs: [process]
if: inputs.windows
permissions:
contents: read
@@ -450,26 +435,15 @@ jobs:
PYI_WHEEL: pyinstaller-${{ matrix.pyi_version }}-py3-none-${{ matrix.platform_tag }}.whl
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: ${{ matrix.python_version }}
architecture: ${{ matrix.arch }}
- name: Cache requirements
id: cache-venv
if: matrix.arch == 'arm64'
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
with:
path: |
/yt-dlp-build-venv
key: ${{ env.BASE_CACHE_KEY }}-${{ github.ref }}-${{ needs.process.outputs.timestamp }}
restore-keys: |
${{ env.BASE_CACHE_KEY }}-${{ github.ref }}-
${{ env.BASE_CACHE_KEY }}-
- name: Install Requirements
env:
ARCH: ${{ matrix.arch }}
@@ -477,6 +451,8 @@ jobs:
PYI_HASH: ${{ matrix.pyi_hash }}
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
$PSNativeCommandUseErrorActionPreference = $true
python -m venv /yt-dlp-build-venv
/yt-dlp-build-venv/Scripts/Activate.ps1
python -m pip install -U pip
@@ -494,12 +470,16 @@ jobs:
- name: Prepare
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
$PSNativeCommandUseErrorActionPreference = $true
python devscripts/update-version.py -c "${Env:CHANNEL}" -r "${Env:ORIGIN}" "${Env:VERSION}"
python devscripts/make_lazy_extractors.py
- name: Build
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
$PSNativeCommandUseErrorActionPreference = $true
/yt-dlp-build-venv/Scripts/Activate.ps1
python -m bundle.pyinstaller
python -m bundle.pyinstaller --onedir
@@ -509,6 +489,8 @@ jobs:
if: vars.UPDATE_TO_VERIFICATION
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
$PSNativeCommandUseErrorActionPreference = $true
$name = "yt-dlp${Env:SUFFIX}"
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
$version = & "./dist/${name}.exe" --version
@@ -519,7 +501,7 @@ jobs:
}
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-bin-${{ github.job }}-${{ matrix.arch }}
path: |
@@ -528,23 +510,25 @@ jobs:
compression-level: 0
meta_files:
if: always() && !cancelled()
name: Metadata files
needs:
- process
- unix
- linux
- macos
- windows
if: always() && !failure() && !cancelled()
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifact
pattern: build-bin-*
merge-multiple: true
- name: Make SHA2-SUMS files
shell: bash
run: |
cd ./artifact/
# make sure SHA sums are also printed to stdout
@@ -600,13 +584,13 @@ jobs:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
if: env.GPG_SIGNING_KEY
run: |
gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}"
gpg --batch --import <<< "${GPG_SIGNING_KEY}"
for signfile in ./SHA*SUMS; do
gpg --batch --detach-sign "$signfile"
done
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-${{ github.job }}
path: |

View File

@@ -1,23 +0,0 @@
name: Keep cache warm
on:
workflow_dispatch:
schedule:
- cron: '0 22 1,6,11,16,21,27 * *'
jobs:
build:
if: |
vars.KEEP_CACHE_WARM || github.event_name == 'workflow_dispatch'
uses: ./.github/workflows/build.yml
with:
version: '999999'
channel: stable
origin: ${{ github.repository }}
unix: false
linux: false
linux_armv7l: true
musllinux: false
macos: true
windows: true
permissions:
contents: read

View File

@@ -16,8 +16,8 @@ on:
- yt_dlp/extractor/youtube/jsc/**.py
- yt_dlp/extractor/youtube/pot/**.py
- yt_dlp/utils/_jsruntime.py
permissions:
contents: read
permissions: {}
concurrency:
group: challenge-tests-${{ github.event.pull_request.number || github.ref }}
@@ -26,6 +26,8 @@ concurrency:
jobs:
tests:
name: Challenge Tests
permissions:
contents: read
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@@ -35,26 +37,30 @@ jobs:
env:
QJS_VERSION: '2025-04-26' # Earliest version with rope strings
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: ${{ matrix.python-version }}
- name: Install Deno
uses: denoland/setup-deno@v2
uses: denoland/setup-deno@e95548e56dfa95d4e1a28d6f422fafe75c4c26fb # v2.0.3
with:
deno-version: '2.0.0' # minimum supported version
- name: Install Bun
uses: oven-sh/setup-bun@v2
uses: oven-sh/setup-bun@735343b667d3e6f658f44d0eca948eb6282f2b76 # v2.0.2
with:
# minimum supported version is 1.0.31 but earliest available Windows version is 1.1.0
bun-version: ${{ (matrix.os == 'windows-latest' && '1.1.0') || '1.0.31' }}
no-cache: true
- name: Install Node
uses: actions/setup-node@v6
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
with:
node-version: '20.0' # minimum supported version
- name: Install QuickJS (Linux)
if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
wget "https://bellard.org/quickjs/binary_releases/quickjs-linux-x86_64-${QJS_VERSION}.zip" -O quickjs.zip
unzip quickjs.zip qjs
@@ -63,15 +69,19 @@ jobs:
if: matrix.os == 'windows-latest'
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
$PSNativeCommandUseErrorActionPreference = $true
Invoke-WebRequest "https://bellard.org/quickjs/binary_releases/quickjs-win-x86_64-${Env:QJS_VERSION}.zip" -OutFile quickjs.zip
unzip quickjs.zip
- name: Install test requirements
shell: bash
run: |
python ./devscripts/install_deps.py --print --omit-default --include-extra test > requirements.txt
python ./devscripts/install_deps.py --print -c certifi -c requests -c urllib3 -c yt-dlp-ejs >> requirements.txt
python -m pip install -U -r requirements.txt
- name: Run tests
timeout-minutes: 15
shell: bash
run: |
python -m yt_dlp -v --js-runtimes node --js-runtimes bun --js-runtimes quickjs || true
python ./devscripts/run_tests.py test/test_jsc -k download

View File

@@ -9,14 +9,20 @@ on:
schedule:
- cron: '59 11 * * 5'
permissions: {}
concurrency:
group: codeql-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
analyze:
name: Analyze (${{ matrix.language }})
runs-on: ubuntu-latest
permissions:
actions: read
actions: read # Needed by github/codeql-action if repository is private
contents: read
security-events: write
security-events: write # Needed to use github/codeql-action with Github Advanced Security
strategy:
fail-fast: false
@@ -25,15 +31,17 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
with:
languages: ${{ matrix.language }}
build-mode: none
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
with:
category: "/language:${{matrix.language}}"

View File

@@ -22,8 +22,8 @@ on:
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
permissions:
contents: read
permissions: {}
concurrency:
group: core-${{ github.event.pull_request.number || github.ref }}
@@ -33,6 +33,8 @@ jobs:
tests:
name: Core Tests
if: "!contains(github.event.head_commit.message, 'ci skip')"
permissions:
contents: read
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@@ -55,11 +57,12 @@ jobs:
- os: windows-latest
python-version: pypy-3.11
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements

View File

@@ -1,48 +0,0 @@
name: Download Tests
on: [push, pull_request]
permissions:
contents: read
jobs:
quick:
name: Quick Download Tests
if: "contains(github.event.head_commit.message, 'ci run dl')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'
- name: Install test requirements
run: python ./devscripts/install_deps.py --include-extra dev
- name: Run tests
continue-on-error: true
run: python ./devscripts/run_tests.py download
full:
name: Full Download Tests
if: "contains(github.event.head_commit.message, 'ci run dl all')"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest]
python-version: ['3.11', '3.12', '3.13', '3.14', pypy-3.11]
include:
# atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest
python-version: '3.10'
- os: windows-latest
python-version: pypy-3.11
steps:
- uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
run: python ./devscripts/install_deps.py --include-extra dev
- name: Run tests
continue-on-error: true
run: python ./devscripts/run_tests.py download

View File

@@ -3,13 +3,14 @@ on:
issues:
types: [opened]
permissions:
issues: write
permissions: {}
jobs:
lockdown:
name: Issue Lockdown
if: vars.ISSUE_LOCKDOWN
permissions:
issues: write # Needed to lock issues
runs-on: ubuntu-latest
steps:
- name: "Lock new issue"

View File

@@ -1,33 +1,47 @@
name: Quick Test
on: [push, pull_request]
permissions:
contents: read
permissions: {}
concurrency:
group: quick-test-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
tests:
name: Core Test
if: "!contains(github.event.head_commit.message, 'ci skip all')"
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- name: Set up Python 3.10
uses: actions/setup-python@v6
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: '3.10'
- name: Install test requirements
shell: bash
run: python ./devscripts/install_deps.py --omit-default --include-extra test
- name: Run tests
timeout-minutes: 15
shell: bash
run: |
python3 -m yt_dlp -v || true
python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
check:
name: Code check
if: "!contains(github.event.head_commit.message, 'ci skip all')"
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: '3.10'
- name: Install dev dependencies
@@ -39,4 +53,5 @@ jobs:
- name: Run autopep8
run: autopep8 --diff .
- name: Check file mode
shell: bash
run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" )

View File

@@ -14,35 +14,39 @@ on:
- ".github/workflows/release-master.yml"
concurrency:
group: release-master
permissions:
contents: read
permissions: {}
jobs:
release:
name: Publish Github release
if: vars.BUILD_MASTER
permissions:
contents: write # May be needed to publish release
id-token: write # Needed for trusted publishing
uses: ./.github/workflows/release.yml
with:
prerelease: true
source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.MASTER_ARCHIVE_REPO) || 'master' }}
target: 'master'
permissions:
contents: write
id-token: write # mandatory for trusted publishing
secrets: inherit
secrets:
ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
publish_pypi:
name: Publish to PyPI
needs: [release]
if: vars.MASTER_PYPI_PROJECT
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
id-token: write # Needed for trusted publishing
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
verbose: true

View File

@@ -2,21 +2,43 @@ name: Release (nightly)
on:
schedule:
- cron: '23 23 * * *'
permissions:
contents: read
workflow_dispatch:
permissions: {}
jobs:
check_nightly:
name: Check for new commits
if: vars.BUILD_NIGHTLY
permissions:
contents: read
runs-on: ubuntu-latest
outputs:
commit: ${{ steps.check_for_new_commits.outputs.commit }}
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
persist-credentials: false
- name: Retrieve HEAD commit hash
id: head
shell: bash
run: echo "head=$(git rev-parse HEAD)" | tee -a "${GITHUB_OUTPUT}"
- name: Cache nightly commit hash
uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
with:
path: .nightly_commit_hash
key: release-nightly-${{ steps.head.outputs.head }}
restore-keys: |
release-nightly-
- name: Check for new commits
id: check_for_new_commits
shell: bash
run: |
relevant_files=(
"yt_dlp/*.py"
@@ -30,34 +52,54 @@ jobs:
".github/workflows/release.yml"
".github/workflows/release-nightly.yml"
)
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
if [[ -f .nightly_commit_hash ]]; then
limit_args=(
"$(cat .nightly_commit_hash)..HEAD"
)
else
limit_args=(
--since="24 hours ago"
)
fi
echo "commit=$(git log --format=%H -1 "${limit_args[@]}" -- "${relevant_files[@]}")" | tee -a "${GITHUB_OUTPUT}"
- name: Record new nightly commit hash
env:
HEAD: ${{ steps.head.outputs.head }}
shell: bash
run: echo "${HEAD}" | tee .nightly_commit_hash
release:
name: Publish Github release
needs: [check_nightly]
if: ${{ needs.check_nightly.outputs.commit }}
permissions:
contents: write # May be needed to publish release
id-token: write # Needed for trusted publishing
uses: ./.github/workflows/release.yml
with:
prerelease: true
source: ${{ (github.repository != 'yt-dlp/yt-dlp' && vars.NIGHTLY_ARCHIVE_REPO) || 'nightly' }}
target: 'nightly'
permissions:
contents: write
id-token: write # mandatory for trusted publishing
secrets: inherit
secrets:
ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
publish_pypi:
name: Publish to PyPI
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
id-token: write # Needed for trusted publishing
runs-on: ubuntu-latest
steps:
- name: Download artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
verbose: true

View File

@@ -22,6 +22,11 @@ on:
required: false
default: true
type: boolean
secrets:
ARCHIVE_REPO_TOKEN:
required: false
GPG_SIGNING_KEY:
required: false
workflow_dispatch:
inputs:
source:
@@ -56,30 +61,30 @@ on:
default: false
type: boolean
permissions:
contents: read
permissions: {}
jobs:
prepare:
name: Prepare
permissions:
contents: write
contents: write # Needed to git-push the release commit
runs-on: ubuntu-latest
outputs:
channel: ${{ steps.setup_variables.outputs.channel }}
version: ${{ steps.setup_variables.outputs.version }}
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }}
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
head_sha: ${{ steps.get_target.outputs.head_sha }}
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
persist-credentials: true # Needed to git-push the release commit
- uses: actions/setup-python@v6
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.10" # Keep this in sync with test-workflows.yml
@@ -104,8 +109,6 @@ jobs:
TARGET_PYPI_SUFFIX: ${{ vars[format('{0}_pypi_suffix', steps.process_inputs.outputs.target_repo)] }}
SOURCE_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.source_repo)] }}
TARGET_ARCHIVE_REPO: ${{ vars[format('{0}_archive_repo', steps.process_inputs.outputs.target_repo)] }}
HAS_SOURCE_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.source_repo)] }}
HAS_TARGET_ARCHIVE_REPO_TOKEN: ${{ !!secrets[format('{0}_archive_repo_token', steps.process_inputs.outputs.target_repo)] }}
HAS_ARCHIVE_REPO_TOKEN: ${{ !!secrets.ARCHIVE_REPO_TOKEN }}
run: |
python -m devscripts.setup_variables
@@ -150,30 +153,34 @@ jobs:
run: git push origin "${GITHUB_EVENT_REF}"
build:
needs: prepare
name: Build
needs: [prepare]
permissions:
contents: read
uses: ./.github/workflows/build.yml
with:
version: ${{ needs.prepare.outputs.version }}
channel: ${{ needs.prepare.outputs.channel }}
origin: ${{ needs.prepare.outputs.target_repo }}
linux_armv7l: ${{ inputs.linux_armv7l }}
permissions:
contents: read
secrets:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
publish_pypi:
name: Publish to PyPI
needs: [prepare, build]
if: ${{ needs.prepare.outputs.pypi_project }}
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
contents: read
id-token: write # Needed for trusted publishing
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions/setup-python@v6
fetch-depth: 0 # Needed for changelog
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.10"
@@ -208,8 +215,8 @@ jobs:
python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
if: github.event.workflow != '.github/workflows/release.yml' # Reusable workflow_call
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: build-pypi
path: |
@@ -217,15 +224,16 @@ jobs:
compression-level: 0
- name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1
if: github.event.workflow == '.github/workflows/release.yml' # Direct workflow_dispatch
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
with:
verbose: true
publish:
name: Publish Github release
needs: [prepare, build]
permissions:
contents: write
contents: write # Needed by gh to publish release to Github
runs-on: ubuntu-latest
env:
TARGET_REPO: ${{ needs.prepare.outputs.target_repo }}
@@ -233,15 +241,16 @@ jobs:
VERSION: ${{ needs.prepare.outputs.version }}
HEAD_SHA: ${{ needs.prepare.outputs.head_sha }}
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
fetch-depth: 0
- uses: actions/download-artifact@v5
persist-credentials: false
- uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: artifact
pattern: build-*
merge-multiple: true
- uses: actions/setup-python@v6
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.10"
@@ -282,7 +291,7 @@ jobs:
- name: Publish to archive repo
env:
GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }}
GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
GH_REPO: ${{ needs.prepare.outputs.target_repo }}
TITLE_PREFIX: ${{ startswith(env.TARGET_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}
TITLE: ${{ inputs.target != env.TARGET_REPO && inputs.target || needs.prepare.outputs.channel }}

View File

@@ -4,14 +4,15 @@ on:
issue_comment:
types: [created, edited]
permissions:
issues: write
permissions: {}
jobs:
sanitize-comment:
name: Sanitize comment
if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request
permissions:
issues: write # Needed by yt-dlp/sanitize-comment to edit comments
runs-on: ubuntu-latest
steps:
- name: Sanitize comment
uses: yt-dlp/sanitize-comment@v1
uses: yt-dlp/sanitize-comment@4536c691101b89f5373d50fe8a7980cae146346b # v1.0.0

View File

@@ -1,21 +1,30 @@
name: Test and lint workflows
on:
push:
branches: [master]
paths:
- .github/*.yml
- .github/workflows/*
- bundle/docker/linux/*.sh
- devscripts/setup_variables.py
- devscripts/setup_variables_tests.py
- devscripts/utils.py
pull_request:
branches: [master]
paths:
- .github/*.yml
- .github/workflows/*
- bundle/docker/linux/*.sh
- devscripts/setup_variables.py
- devscripts/setup_variables_tests.py
- devscripts/utils.py
permissions:
contents: read
permissions: {}
concurrency:
group: test-workflows-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
ACTIONLINT_VERSION: "1.7.9"
ACTIONLINT_SHA256SUM: 233b280d05e100837f4af1433c7b40a5dcb306e3aa68fb4f17f8a7f45a7df7b4
@@ -24,15 +33,20 @@ env:
jobs:
check:
name: Check workflows
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.10" # Keep this in sync with release.yml's prepare job
- name: Install requirements
env:
ACTIONLINT_TARBALL: ${{ format('actionlint_{0}_linux_amd64.tar.gz', env.ACTIONLINT_VERSION) }}
shell: bash
run: |
python -m devscripts.install_deps --omit-default --include-extra test
sudo apt -y install shellcheck
@@ -50,3 +64,20 @@ jobs:
- name: Test GHA devscripts
run: |
pytest -Werror --tb=short --color=yes devscripts/setup_variables_tests.py
zizmor:
name: Run zizmor
permissions:
contents: read
actions: read # Needed by zizmorcore/zizmor-action if repository is private
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0
with:
advanced-security: false
persona: pedantic
version: v1.19.0

15
.github/zizmor.yml vendored Normal file
View File

@@ -0,0 +1,15 @@
rules:
concurrency-limits:
ignore:
- build.yml # Can only be triggered by maintainers or cronjob
- issue-lockdown.yml # It *should* run for *every* new issue
- release-nightly.yml # Can only be triggered by once-daily cronjob
- release.yml # Can only be triggered by maintainers or cronjob
- sanitize-comment.yml # It *should* run for *every* new comment/edit
obfuscation:
ignore:
- release.yml # Not actual obfuscation
unpinned-uses:
config:
policies:
"*": hash-pin

View File

@@ -1351,6 +1351,7 @@ The available fields are:
- `repost_count` (numeric): Number of reposts of the video
- `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
- `save_count` (numeric): Number of times the video has been saved or bookmarked
- `age_limit` (numeric): Age restriction for the video (years)
- `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed)
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
@@ -1820,6 +1821,9 @@ $ yt-dlp --parse-metadata "title:%(artist)s - %(title)s"
# Regex example
$ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)"
# Copy the episode field to the title field (with FROM and TO as single fields)
$ yt-dlp --parse-metadata "episode:title"
# Set title as "Series name S01E05"
$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s"
@@ -1859,8 +1863,9 @@ The following extractors use this feature:
* `player_js_variant`: The player javascript variant to use for n/sig deciphering. The known variants are: `main`, `tcc`, `tce`, `es5`, `es6`, `tv`, `tv_es6`, `phone`, `tablet`. The default is `main`, and the others are for debugging purposes. You can use `actual` to go with what is prescribed by the site
* `player_js_version`: The player javascript version to use for n/sig deciphering, in the format of `signature_timestamp@hash` (e.g. `20348@0004de42`). The default is to use what is prescribed by the site, and can be selected with `actual`
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread,max-depth`. Default is `all,all,all,all,all`
* A `max-depth` value of `1` will discard all replies, regardless of the `max-replies` or `max-replies-per-thread` values given
* E.g. `all,all,1000,10,2` will get a maximum of 1000 replies total, with up to 10 replies per thread, and only 2 levels of depth (i.e. top-level comments plus their immediate replies). `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used

View File

@@ -26,7 +26,7 @@ services:
platforms:
- "linux/amd64"
args:
VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:latest
VERIFYIMAGE: quay.io/pypa/manylinux2014_x86_64:2025.12.19-1@sha256:b716645f9aecd0c1418283af930804bbdbd68a73d855a60101c5aab8548d737d
environment:
EXE_NAME: ${EXE_NAME:?}
UPDATE_TO:
@@ -61,7 +61,7 @@ services:
platforms:
- "linux/arm64"
args:
VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:latest
VERIFYIMAGE: quay.io/pypa/manylinux2014_aarch64:2025.12.19-1@sha256:36cbe6638c7c605c2b44a92e35751baa537ec8902112f790139d89c7e1ccd2a4
environment:
EXE_NAME: ${EXE_NAME:?}
UPDATE_TO:
@@ -97,7 +97,7 @@ services:
platforms:
- "linux/arm/v7"
args:
VERIFYIMAGE: arm32v7/debian:bullseye
VERIFYIMAGE: arm32v7/debian:bullseye@sha256:9d544bf6ff73e36b8df1b7e415f6c8ee40ed84a0f3a26970cac8ea88b0ccf2ac
environment:
EXE_NAME: ${EXE_NAME:?}
UPDATE_TO:
@@ -132,7 +132,7 @@ services:
platforms:
- "linux/amd64"
args:
VERIFYIMAGE: alpine:3.22
VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62
environment:
EXE_NAME: ${EXE_NAME:?}
UPDATE_TO:
@@ -168,7 +168,7 @@ services:
platforms:
- "linux/arm64"
args:
VERIFYIMAGE: alpine:3.22
VERIFYIMAGE: alpine:3.23.2@sha256:865b95f46d98cf867a156fe4a135ad3fe50d2056aa3f25ed31662dff6da4eb62
environment:
EXE_NAME: ${EXE_NAME:?}
UPDATE_TO:

View File

@@ -21,8 +21,6 @@ def setup_variables(environment):
SOURCE_PYPI_PROJECT, SOURCE_PYPI_SUFFIX,
TARGET_PYPI_PROJECT, TARGET_PYPI_SUFFIX,
SOURCE_ARCHIVE_REPO, TARGET_ARCHIVE_REPO,
HAS_SOURCE_ARCHIVE_REPO_TOKEN,
HAS_TARGET_ARCHIVE_REPO_TOKEN,
HAS_ARCHIVE_REPO_TOKEN
`INPUTS` must contain these keys:
@@ -37,8 +35,6 @@ def setup_variables(environment):
PROCESSED = json.loads(environment['PROCESSED'])
source_channel = None
does_not_have_needed_token = False
target_repo_token = None
pypi_project = None
pypi_suffix = None
@@ -81,28 +77,19 @@ def setup_variables(environment):
target_repo = REPOSITORY
if target_repo != REPOSITORY:
target_repo = environment['TARGET_ARCHIVE_REPO']
target_repo_token = f'{PROCESSED["target_repo"].upper()}_ARCHIVE_REPO_TOKEN'
if not json.loads(environment['HAS_TARGET_ARCHIVE_REPO_TOKEN']):
does_not_have_needed_token = True
pypi_project = environment['TARGET_PYPI_PROJECT'] or None
pypi_suffix = environment['TARGET_PYPI_SUFFIX'] or None
else:
target_tag = source_tag or version
if source_channel:
target_repo = source_channel
target_repo_token = f'{PROCESSED["source_repo"].upper()}_ARCHIVE_REPO_TOKEN'
if not json.loads(environment['HAS_SOURCE_ARCHIVE_REPO_TOKEN']):
does_not_have_needed_token = True
pypi_project = environment['SOURCE_PYPI_PROJECT'] or None
pypi_suffix = environment['SOURCE_PYPI_SUFFIX'] or None
else:
target_repo = REPOSITORY
if does_not_have_needed_token:
if not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']):
print(f'::error::Repository access secret {target_repo_token} not found')
return None
target_repo_token = 'ARCHIVE_REPO_TOKEN'
if target_repo != REPOSITORY and not json.loads(environment['HAS_ARCHIVE_REPO_TOKEN']):
return None
if target_repo == REPOSITORY and not INPUTS['prerelease']:
pypi_project = environment['PYPI_PROJECT'] or None
@@ -111,7 +98,6 @@ def setup_variables(environment):
'channel': resolved_source,
'version': version,
'target_repo': target_repo,
'target_repo_token': target_repo_token,
'target_tag': target_tag,
'pypi_project': pypi_project,
'pypi_suffix': pypi_suffix,
@@ -147,6 +133,7 @@ if __name__ == '__main__':
outputs = setup_variables(dict(os.environ))
if not outputs:
print('::error::Repository access secret ARCHIVE_REPO_TOKEN not found')
sys.exit(1)
print('::group::Output variables')

View File

@@ -9,8 +9,10 @@ import json
from devscripts.setup_variables import STABLE_REPOSITORY, process_inputs, setup_variables
from devscripts.utils import calculate_version
GENERATE_TEST_DATA = object()
def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected=None, ignore_revision=False):
def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected, ignore_revision=False):
inp = inputs.copy()
inp.setdefault('linux_armv7l', True)
inp.setdefault('prerelease', False)
@@ -33,16 +35,19 @@ def _test(github_repository, note, repo_vars, repo_secrets, inputs, expected=Non
'TARGET_PYPI_SUFFIX': variables.get(f'{target_repo}_PYPI_SUFFIX') or '',
'SOURCE_ARCHIVE_REPO': variables.get(f'{source_repo}_ARCHIVE_REPO') or '',
'TARGET_ARCHIVE_REPO': variables.get(f'{target_repo}_ARCHIVE_REPO') or '',
'HAS_SOURCE_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{source_repo}_ARCHIVE_REPO_TOKEN'))),
'HAS_TARGET_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get(f'{target_repo}_ARCHIVE_REPO_TOKEN'))),
'HAS_ARCHIVE_REPO_TOKEN': json.dumps(bool(secrets.get('ARCHIVE_REPO_TOKEN'))),
}
result = setup_variables(env)
if not expected:
if expected is GENERATE_TEST_DATA:
print(' {\n' + '\n'.join(f' {k!r}: {v!r},' for k, v in result.items()) + '\n }')
return
if expected is None:
assert result is None, f'expected error/None but got dict: {github_repository} {note}'
return
exp = expected.copy()
if ignore_revision:
assert len(result['version']) == len(exp['version']), f'revision missing: {github_repository} {note}'
@@ -77,7 +82,6 @@ def test_setup_variables():
'channel': 'stable',
'version': DEFAULT_VERSION,
'target_repo': STABLE_REPOSITORY,
'target_repo_token': None,
'target_tag': DEFAULT_VERSION,
'pypi_project': 'yt-dlp',
'pypi_suffix': None,
@@ -91,7 +95,6 @@ def test_setup_variables():
'channel': 'nightly',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': 'yt-dlp/yt-dlp-nightly-builds',
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': 'yt-dlp',
'pypi_suffix': 'dev',
@@ -106,7 +109,6 @@ def test_setup_variables():
'channel': 'nightly',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': 'yt-dlp/yt-dlp-nightly-builds',
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': 'yt-dlp',
'pypi_suffix': 'dev',
@@ -120,7 +122,6 @@ def test_setup_variables():
'channel': 'master',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': 'yt-dlp/yt-dlp-master-builds',
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
@@ -135,7 +136,6 @@ def test_setup_variables():
'channel': 'master',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': 'yt-dlp/yt-dlp-master-builds',
'target_repo_token': 'ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
@@ -149,7 +149,6 @@ def test_setup_variables():
'channel': 'stable',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': STABLE_REPOSITORY,
'target_repo_token': None,
'target_tag': 'experimental',
'pypi_project': None,
'pypi_suffix': None,
@@ -163,7 +162,6 @@ def test_setup_variables():
'channel': 'stable',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': STABLE_REPOSITORY,
'target_repo_token': None,
'target_tag': 'experimental',
'pypi_project': None,
'pypi_suffix': None,
@@ -175,7 +173,6 @@ def test_setup_variables():
'channel': FORK_REPOSITORY,
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
@@ -186,7 +183,6 @@ def test_setup_variables():
'channel': FORK_REPOSITORY,
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
@@ -201,7 +197,6 @@ def test_setup_variables():
'channel': f'{FORK_REPOSITORY}@nightly',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': 'nightly',
'pypi_project': None,
'pypi_suffix': None,
@@ -216,7 +211,6 @@ def test_setup_variables():
'channel': f'{FORK_REPOSITORY}@master',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': 'master',
'pypi_project': None,
'pypi_suffix': None,
@@ -227,7 +221,6 @@ def test_setup_variables():
'channel': FORK_REPOSITORY,
'version': f'{DEFAULT_VERSION[:10]}.123',
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': f'{DEFAULT_VERSION[:10]}.123',
'pypi_project': None,
'pypi_suffix': None,
@@ -239,7 +232,6 @@ def test_setup_variables():
'channel': FORK_REPOSITORY,
'version': DEFAULT_VERSION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': DEFAULT_VERSION,
'pypi_project': None,
'pypi_suffix': None,
@@ -250,19 +242,16 @@ def test_setup_variables():
'channel': FORK_REPOSITORY,
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
}, ignore_revision=True)
_test(
FORK_REPOSITORY, 'fork w/NIGHTLY_ARCHIVE_REPO_TOKEN, nightly', {
FORK_REPOSITORY, 'fork, nightly', {
'NIGHTLY_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-nightly-builds',
'PYPI_PROJECT': 'yt-dlp-test',
}, {
'NIGHTLY_ARCHIVE_REPO_TOKEN': '1',
}, {
}, BASE_REPO_SECRETS, {
'source': f'{FORK_ORG}/yt-dlp-nightly-builds',
'target': 'nightly',
'prerelease': True,
@@ -270,19 +259,16 @@ def test_setup_variables():
'channel': f'{FORK_ORG}/yt-dlp-nightly-builds',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': f'{FORK_ORG}/yt-dlp-nightly-builds',
'target_repo_token': 'NIGHTLY_ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': None,
'pypi_suffix': None,
}, ignore_revision=True)
_test(
FORK_REPOSITORY, 'fork w/MASTER_ARCHIVE_REPO_TOKEN, master', {
FORK_REPOSITORY, 'fork, master', {
'MASTER_ARCHIVE_REPO': f'{FORK_ORG}/yt-dlp-master-builds',
'MASTER_PYPI_PROJECT': 'yt-dlp-test',
'MASTER_PYPI_SUFFIX': 'dev',
}, {
'MASTER_ARCHIVE_REPO_TOKEN': '1',
}, {
}, BASE_REPO_SECRETS, {
'source': f'{FORK_ORG}/yt-dlp-master-builds',
'target': 'master',
'prerelease': True,
@@ -290,7 +276,6 @@ def test_setup_variables():
'channel': f'{FORK_ORG}/yt-dlp-master-builds',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': f'{FORK_ORG}/yt-dlp-master-builds',
'target_repo_token': 'MASTER_ARCHIVE_REPO_TOKEN',
'target_tag': DEFAULT_VERSION_WITH_REVISION,
'pypi_project': 'yt-dlp-test',
'pypi_suffix': 'dev',
@@ -302,7 +287,6 @@ def test_setup_variables():
'channel': f'{FORK_REPOSITORY}@experimental',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': 'experimental',
'pypi_project': None,
'pypi_suffix': None,
@@ -317,8 +301,15 @@ def test_setup_variables():
'channel': 'stable',
'version': DEFAULT_VERSION_WITH_REVISION,
'target_repo': FORK_REPOSITORY,
'target_repo_token': None,
'target_tag': 'experimental',
'pypi_project': None,
'pypi_suffix': None,
}, ignore_revision=True)
_test(
STABLE_REPOSITORY, 'official vars but no ARCHIVE_REPO_TOKEN, nightly',
BASE_REPO_VARS, {}, {
'source': 'nightly',
'target': 'nightly',
'prerelease': True,
}, None)

View File

@@ -261,9 +261,9 @@ def sanitize_got_info_dict(got_dict):
def expect_info_dict(self, got_dict, expected_dict):
ALLOWED_KEYS_SORT_ORDER = (
# NB: Keep in sync with the docstring of extractor/common.py
'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
'ie_key', 'url', 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type',
'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified',
'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count',
'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', 'save_count',
'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap',
'chapters', 'chapter', 'chapter_number', 'chapter_id', 'start_time', 'end_time', 'section_start', 'section_end',
'categories', 'tags', 'cast', 'composers', 'artists', 'album_artists', 'creators', 'genres',

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, is_download_test
from yt_dlp.extractor import IqiyiIE
class WarningLogger:
def __init__(self):
self.messages = []
def warning(self, msg):
self.messages.append(msg)
def debug(self, msg):
pass
def error(self, msg):
pass
@is_download_test
class TestIqiyiSDKInterpreter(unittest.TestCase):
def test_iqiyi_sdk_interpreter(self):
"""
Test the functionality of IqiyiSDKInterpreter by trying to log in
If `sign` is incorrect, /validate call throws an HTTP 556 error
"""
logger = WarningLogger()
ie = IqiyiIE(FakeYDL({'logger': logger}))
ie._perform_login('foo', 'bar')
self.assertTrue('unable to log in:' in logger.messages[0])
if __name__ == '__main__':
unittest.main()

View File

@@ -29,6 +29,11 @@ class TestMetadataFromField(unittest.TestCase):
MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
r'(?P<title>.+)\ \-\ (?P<artist>.+)')
self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
self.assertEqual(MetadataParserPP.format_to_regex(r'text (?P<x>.+)'), r'text (?P<x>.+)')
self.assertEqual(MetadataParserPP.format_to_regex('x'), r'(?s)(?P<x>.+)')
self.assertEqual(MetadataParserPP.format_to_regex('Field_Name1'), r'(?s)(?P<Field_Name1>.+)')
self.assertEqual(MetadataParserPP.format_to_regex('é'), r'(?s)(?P<é>.+)')
self.assertEqual(MetadataParserPP.format_to_regex('invalid '), 'invalid ')
def test_field_to_template(self):
self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')

View File

@@ -489,6 +489,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=0), 1767225600)
self.assertEqual(unified_timestamp('2026-01-01 00:00:00', tz_offset=8), 1767196800)
self.assertEqual(unified_timestamp('2026-01-01 00:00:00 +0800', tz_offset=-5), 1767196800)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@@ -1276,6 +1280,9 @@ class TestUtil(unittest.TestCase):
on = js_to_json('[new Date("spam"), \'("eggs")\']')
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
on = js_to_json('[0.077, 7.06, 29.064, 169.0072]')
self.assertEqual(json.loads(on), [0.077, 7.06, 29.064, 169.0072])
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')

View File

@@ -595,7 +595,7 @@ class YoutubeDL:
'width', 'height', 'asr', 'audio_channels', 'fps',
'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
'timestamp', 'release_timestamp', 'available_at',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'save_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
@@ -3026,6 +3026,10 @@ class YoutubeDL:
format_selector = self.format_selector
while True:
if interactive_format_selection:
if not formats:
# Bypass interactive format selection if no formats & --ignore-no-formats-error
formats_to_download = None
break
req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
+ '(Press ENTER for default, or Ctrl+C to quit)'
+ self._format_screen(': ', self.Styles.EMPHASIS))

View File

@@ -431,6 +431,7 @@ from .cpac import (
)
from .cracked import CrackedIE
from .craftsy import CraftsyIE
from .croatianfilm import CroatianFilmIE
from .crooksandliars import CrooksAndLiarsIE
from .crowdbunker import (
CrowdBunkerChannelIE,
@@ -638,6 +639,7 @@ from .fc2 import (
)
from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmarchiv import FilmArchivIE
from .filmon import (
FilmOnChannelIE,
FilmOnIE,
@@ -1086,11 +1088,6 @@ from .mangomolo import (
MangomoloLiveIE,
MangomoloVideoIE,
)
from .manoto import (
ManotoTVIE,
ManotoTVLiveIE,
ManotoTVShowIE,
)
from .manyvids import ManyVidsIE
from .maoritv import MaoriTVIE
from .markiza import (
@@ -1278,6 +1275,7 @@ from .nebula import (
NebulaChannelIE,
NebulaClassIE,
NebulaIE,
NebulaSeasonIE,
NebulaSubscriptionsIE,
)
from .nekohacker import NekoHackerIE
@@ -1312,12 +1310,6 @@ from .newgrounds import (
)
from .newspicks import NewsPicksIE
from .newsy import NewsyIE
from .nextmedia import (
AppleDailyIE,
NextMediaActionNewsIE,
NextMediaIE,
NextTVIE,
)
from .nexx import (
NexxEmbedIE,
NexxIE,
@@ -1486,6 +1478,7 @@ from .palcomp3 import (
PalcoMP3IE,
PalcoMP3VideoIE,
)
from .pandatv import PandaTvIE
from .panopto import (
PanoptoIE,
PanoptoListIE,
@@ -1834,10 +1827,6 @@ from .scrippsnetworks import (
ScrippsNetworksWatchIE,
)
from .scrolller import ScrolllerIE
from .scte import (
SCTEIE,
SCTECourseIE,
)
from .sejmpl import SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE
@@ -2019,6 +2008,11 @@ from .taptap import (
TapTapMomentIE,
TapTapPostIntlIE,
)
from .tarangplus import (
TarangPlusEpisodesIE,
TarangPlusPlaylistIE,
TarangPlusVideoIE,
)
from .tass import TassIE
from .tbs import TBSIE
from .tbsjp import (

View File

@@ -279,7 +279,7 @@ class ArchiveOrgIE(InfoExtractor):
'url': 'https://archive.org/' + track['file'].lstrip('/'),
}
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
metadata = self._download_json(f'https://archive.org/metadata/{identifier}', identifier)
m = metadata['metadata']
identifier = m['identifier']

View File

@@ -5,16 +5,18 @@ import time
from .common import InfoExtractor
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
clean_html,
extract_attributes,
float_or_none,
format_field,
int_or_none,
join_nonempty,
parse_filesize,
parse_qs,
str_or_none,
strftime_or_none,
try_get,
unified_strdate,
unified_timestamp,
update_url_query,
url_or_none,
@@ -411,70 +413,67 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/radio/?\?(?:[^#]+&)?show=(?P<id>\d+)'
_TESTS = [{
'url': 'https://bandcamp.com/?show=224',
'url': 'https://bandcamp.com/radio?show=224',
'md5': '61acc9a002bed93986b91168aa3ab433',
'info_dict': {
'id': '224',
'ext': 'mp3',
'title': 'BC Weekly April 4th 2017 - Magic Moments',
'title': 'Bandcamp Weekly, 2017-04-04',
'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77,
'release_date': '20170404',
'thumbnail': 'https://f4.bcbits.com/img/9982549_0.jpg',
'series': 'Bandcamp Weekly',
'episode': 'Magic Moments',
'episode_id': '224',
'release_timestamp': 1491264000,
'release_date': '20170404',
'duration': 5829.77,
},
'params': {
'format': 'mp3-128',
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'url': 'https://bandcamp.com/radio/?foo=bar&show=224',
'only_matching': True,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
audio_data = self._download_json(
'https://bandcamp.com/api/bcradio_api/1/get_show',
show_id, 'Downloading radio show JSON',
data=json.dumps({'id': show_id}).encode(),
headers={'Content-Type': 'application/json'})['radioShowAudio']
blob = self._extract_data_attr(webpage, show_id, 'blob')
stream_url = audio_data['streamUrl']
format_id = traverse_obj(stream_url, ({parse_qs}, 'enc', -1))
encoding, _, bitrate_str = (format_id or '').partition('-')
show = blob['bcw_data'][show_id]
webpage = self._download_webpage(url, show_id, fatal=False)
metadata = traverse_obj(
self._extract_data_attr(webpage, show_id, 'blob', fatal=False),
('appData', 'shows', lambda _, v: str(v['showId']) == show_id, any)) or {}
formats = []
for format_id, format_url in show['audio_stream'].items():
if not url_or_none(format_url):
continue
for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id:
ext = known_ext
break
else:
ext = None
formats.append({
'format_id': format_id,
'url': format_url,
'ext': ext,
'vcodec': 'none',
})
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
if subtitle:
title += f' - {subtitle}'
series_title = audio_data.get('title') or metadata.get('title')
release_timestamp = unified_timestamp(audio_data.get('date')) or unified_timestamp(metadata.get('date'))
return {
'id': show_id,
'title': title,
'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')),
'is_live': False,
'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_id': show_id,
'formats': formats,
'title': join_nonempty(series_title, strftime_or_none(release_timestamp, '%Y-%m-%d'), delim=', '),
'series': series_title,
'thumbnail': format_field(metadata, 'imageId', 'https://f4.bcbits.com/img/%s_0.jpg', default=None),
'description': metadata.get('desc') or metadata.get('short_desc'),
'duration': float_or_none(audio_data.get('duration')),
'release_timestamp': release_timestamp,
'formats': [{
'url': stream_url,
'format_id': format_id,
'ext': encoding or 'mp3',
'acodec': encoding or None,
'vcodec': 'none',
'abr': int_or_none(bitrate_str),
}],
}

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from ..utils import ExtractorError, urlencode_postdata
from ..utils import ExtractorError, UserNotLive, urlencode_postdata
class BigoIE(InfoExtractor):
@@ -40,7 +40,7 @@ class BigoIE(InfoExtractor):
info = info_raw.get('data') or {}
if not info.get('alive'):
raise ExtractorError('This user is offline.', expected=True)
raise UserNotLive(video_id=user_id)
formats, subs = self._extract_m3u8_formats_and_subtitles(
info.get('hls_src'), user_id, 'mp4', 'm3u8')

View File

@@ -27,7 +27,7 @@ from ..utils.traversal import traverse_obj
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_VALID_URL = r'https?://(?:(?:(?:www|m)\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_NETRC_MACHINE = 'cdapl'
_BASE_URL = 'https://www.cda.pl'
@@ -110,6 +110,9 @@ class CDAIE(InfoExtractor):
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}, {
'url': 'https://m.cda.pl/video/617297677',
'only_matching': True,
}]
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
@@ -367,35 +370,35 @@ class CDAIE(InfoExtractor):
class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://www.cda.pl/domino264/folder/31188385',
'info_dict': {
'id': '31188385',
'title': 'SERIA DRUGA',
},
'playlist_mincount': 13,
_VALID_URL = r'https?://(?:(?:www|m)\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.cda.pl/domino264/folder/31188385',
'info_dict': {
'id': '31188385',
'title': 'SERIA DRUGA',
},
{
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
'info_dict': {
'id': '2664592',
'title': 'VideoDowcipy - wszystkie odcinki',
},
'playlist_mincount': 71,
'playlist_mincount': 13,
}, {
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
'info_dict': {
'id': '2664592',
'title': 'VideoDowcipy - wszystkie odcinki',
},
{
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
'info_dict': {
'id': '19129979',
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}]
'playlist_mincount': 71,
}, {
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
'info_dict': {
'id': '19129979',
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}, {
'url': 'https://m.cda.pl/smiechawaTV/folder/2664592/vfilm',
'only_matching': True,
}]
def _real_extract(self, url):
folder_id, channel = self._match_valid_url(url).group('id', 'channel')

View File

@@ -348,6 +348,7 @@ class InfoExtractor:
duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform.
concurrent_view_count: How many users are currently watching the video on the platform.
save_count: Number of times the video has been saved or bookmarked
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
repost_count: Number of reposts of the video

View File

@@ -0,0 +1,79 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
ExtractorError,
join_nonempty,
)
from ..utils.traversal import traverse_obj
class CroatianFilmIE(InfoExtractor):
IE_NAME = 'croatian.film'
_VALID_URL = r'https://?(?:www\.)?croatian\.film/[a-z]{2}/[^/?#]+/(?P<id>\d+)'
_GEO_COUNTRIES = ['HR']
_TESTS = [{
'url': 'https://www.croatian.film/hr/films/72472',
'info_dict': {
'id': '1078340774',
'ext': 'mp4',
'title': '“ŠKAFETIN”, r. Paško Vukasović',
'uploader': 'croatian.film',
'uploader_id': 'user94192658',
'uploader_url': 'https://vimeo.com/user94192658',
'duration': 1357,
'thumbnail': 'https://i.vimeocdn.com/video/2008556407-40eb1315ec11be5fcb8dda4d7059675b0881e182b9fc730892e267db72cb57f5-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# geo-restricted but works with xff
'url': 'https://www.croatian.film/en/films/77144',
'info_dict': {
'id': '1144997795',
'ext': 'mp4',
'title': '“ROKO” r. Ivana Marinić Kragić',
'uploader': 'croatian.film',
'uploader_id': 'user94192658',
'uploader_url': 'https://vimeo.com/user94192658',
'duration': 1023,
'thumbnail': 'https://i.vimeocdn.com/video/2093793231-11c2928698ff8347489e679b4d563a576e7acd0681ce95b383a9a25f6adb5e8f-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
'url': 'https://www.croatian.film/en/films/75904/watch',
'info_dict': {
'id': '1134883757',
'ext': 'mp4',
'title': '"CARPE DIEM" r. Nina Damjanović',
'uploader': 'croatian.film',
'uploader_id': 'user94192658',
'uploader_url': 'https://vimeo.com/user94192658',
'duration': 1123,
'thumbnail': 'https://i.vimeocdn.com/video/2080022187-bb691c470c28c4d979258cf235e594bf9a11c14b837a0784326c25c95edd83f9-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
api_data = self._download_json(
f'https://api.croatian.film/api/videos/{display_id}',
display_id)
if errors := traverse_obj(api_data, ('errors', lambda _, v: v['code'])):
codes = traverse_obj(errors, (..., 'code', {str}))
if 'INVALID_COUNTRY' in codes:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(join_nonempty(
*(traverse_obj(errors, (..., 'details', {str})) or codes),
delim='; '))
vimeo_id = self._search_regex(
r'/videos/(\d+)', api_data['video']['vimeoURL'], 'vimeo ID')
return self.url_result(
VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', url),
VimeoIE, vimeo_id)

View File

@@ -14,7 +14,7 @@ from ..utils import (
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/f[io]|sh?)/(?P<id>\w+)'
_TESTS = [
{
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
@@ -35,6 +35,9 @@ class DropboxIE(InfoExtractor):
}, {
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
'only_matching': True,
}, {
'url': 'https://www.dropbox.com/scl/fo/zjfqse5txqfd7twa8iewj/AOfZzSYWUSKle2HD7XF7kzQ/A-BEAT%20C.mp4?rlkey=6tg3jkp4tv6a5vt58a6dag0mm&dl=0',
'only_matching': True,
},
]

View File

@@ -4,8 +4,6 @@ import urllib.parse
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..utils import (
ExtractorError,
clean_html,
@@ -64,9 +62,6 @@ class FacebookIE(InfoExtractor):
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''',
]
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
IE_NAME = 'facebook'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
@@ -469,65 +464,6 @@ class FacebookIE(InfoExtractor):
'graphURI': '/api/graphql/',
}
def _perform_login(self, username, password):
login_page_req = Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
errnote='Unable to download login page')
lsd = self._search_regex(
r'<input type="hidden" name="lsd" value="([^"]*)"',
login_page, 'lsd')
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
login_form = {
'email': username,
'pass': password,
'lsd': lsd,
'lgnrnd': lgnrnd,
'next': 'http://facebook.com/home.php',
'default_persistent': '0',
'legacy_return': '1',
'timezone': '-60',
'trynum': '1',
}
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
try:
login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
error = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
login_results, 'login error', default=None, group='error')
if error:
raise ExtractorError(f'Unable to login: {error}', expected=True)
self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
return
fb_dtsg = self._search_regex(
r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
h = self._search_regex(
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
if not fb_dtsg or not h:
return
check_form = {
'fb_dtsg': fb_dtsg,
'h': h,
'name_action_selected': 'dont_save',
}
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
except network_exceptions as err:
self.report_warning(f'unable to log in: {err}')
return
def _extract_from_url(self, url, video_id):
webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)

View File

@@ -0,0 +1,52 @@
from .common import InfoExtractor
from ..utils import clean_html
from ..utils.traversal import (
find_element,
find_elements,
traverse_obj,
)
class FilmArchivIE(InfoExtractor):
IE_DESC = 'FILMARCHIV ON'
_VALID_URL = r'https?://(?:www\.)?filmarchiv\.at/de/filmarchiv-on/video/(?P<id>f_[0-9a-zA-Z]{5,})'
_TESTS = [{
'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0305p7xKrXUPBwoNE9x6mh',
'md5': '54a6596f6a84624531866008a77fa27a',
'info_dict': {
'id': 'f_0305p7xKrXUPBwoNE9x6mh',
'ext': 'mp4',
'title': 'Der Wurstelprater zur Kaiserzeit',
'description': 'md5:9843f92df5cc9a4975cee7aabcf6e3b2',
'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0305/p7xKrXUPBwoNE9x6mh_v1/poster\.jpg',
},
}, {
'url': 'https://www.filmarchiv.at/de/filmarchiv-on/video/f_0306vI3wO0tJIsfrqYFQXF',
'md5': '595385d7f54cb6529140ee8de7d1c3c7',
'info_dict': {
'id': 'f_0306vI3wO0tJIsfrqYFQXF',
'ext': 'mp4',
'title': 'Vor 70 Jahren: Wettgehen der Briefträger in Wien',
'description': 'md5:b2a2e4230923cd1969d471c552e62811',
'thumbnail': r're:https://cdn\.filmarchiv\.at/f_0306/vI3wO0tJIsfrqYFQXF_v1/poster\.jpg',
},
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
path = '/'.join((media_id[:6], media_id[6:]))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://cdn.filmarchiv.at/{path}_v1_sv1/playlist.m3u8', media_id)
return {
'id': media_id,
'title': traverse_obj(webpage, ({find_element(tag='title-div')}, {clean_html})),
'description': traverse_obj(webpage, (
{find_elements(tag='div', attr='class', value=r'.*\bborder-base-content\b', regex=True)}, ...,
{find_elements(tag='div', attr='class', value=r'.*\bprose\b', html=False, regex=True)}, ...,
{clean_html}, any)),
'thumbnail': f'https://cdn.filmarchiv.at/{path}_v1/poster.jpg',
'formats': formats,
'subtitles': subtitles,
}

View File

@@ -821,13 +821,17 @@ class GenericIE(InfoExtractor):
'Referer': smuggled_data.get('referer'),
}), impersonate=impersonate)
except ExtractorError as e:
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
and e.cause.response.get_header('cf-mitigated') == 'challenge'
and e.cause.response.extensions.get('impersonate') is None):
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
res = e.cause.response
already_impersonating = res.extensions.get('impersonate') is not None
if already_impersonating or (
res.get_header('cf-mitigated') != 'challenge'
and b'<title>Attention Required! | Cloudflare</title>' not in res.read()
):
raise
cf_cookie_domain = traverse_obj(
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
('__cf_bm', 'domain'))
LenientSimpleCookie(res.get_header('set-cookie')), ('__cf_bm', 'domain'))
if cf_cookie_domain:
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')

View File

@@ -46,6 +46,7 @@ class GofileIE(InfoExtractor):
'videopassword': 'password',
},
}]
_STATIC_TOKEN = '4fd6sg89d7s6' # From https://gofile.io/dist/js/config.js
_TOKEN = None
def _real_initialize(self):
@@ -60,13 +61,16 @@ class GofileIE(InfoExtractor):
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id):
query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js
password = self.get_param('videopassword')
if password:
query_params = {}
if password := self.get_param('videopassword'):
query_params['password'] = hashlib.sha256(password.encode()).hexdigest()
files = self._download_json(
f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'})
query=query_params, headers={
'Authorization': f'Bearer {self._TOKEN}',
'X-Website-Token': self._STATIC_TOKEN,
})
status = files['status']
if status == 'error-passwordRequired':

View File

@@ -27,7 +27,7 @@ class HotStarBaseIE(InfoExtractor):
_TOKEN_NAME = 'userUP'
_BASE_URL = 'https://www.hotstar.com'
_API_URL = 'https://api.hotstar.com'
_API_URL_V2 = 'https://apix.hotstar.com/v2'
_API_URL_V2 = 'https://www.hotstar.com/api/internal/bff/v2'
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
_FREE_HEADERS = {

View File

@@ -9,14 +9,12 @@ from .openload import PhantomJSwrapper
from ..utils import (
ExtractorError,
clean_html,
decode_packed_codes,
float_or_none,
format_field,
get_element_by_attribute,
get_element_by_id,
int_or_none,
js_to_json,
ohdave_rsa_encrypt,
parse_age_limit,
parse_duration,
parse_iso8601,
@@ -33,143 +31,12 @@ def md5_text(text):
return hashlib.md5(text.encode()).hexdigest()
class IqiyiSDK:
def __init__(self, target, ip, timestamp):
self.target = target
self.ip = ip
self.timestamp = timestamp
@staticmethod
def split_sum(data):
return str(sum(int(p, 16) for p in data))
@staticmethod
def digit_sum(num):
if isinstance(num, int):
num = str(num)
return str(sum(map(int, num)))
def even_odd(self):
even = self.digit_sum(str(self.timestamp)[::2])
odd = self.digit_sum(str(self.timestamp)[1::2])
return even, odd
def preprocess(self, chunksize):
self.target = md5_text(self.target)
chunks = []
for i in range(32 // chunksize):
chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
if 32 % chunksize:
chunks.append(self.target[32 - 32 % chunksize:])
return chunks, list(map(int, self.ip.split('.')))
def mod(self, modulus):
chunks, ip = self.preprocess(32)
self.target = chunks[0] + ''.join(str(p % modulus) for p in ip)
def split(self, chunksize):
modulus_map = {
4: 256,
5: 10,
8: 100,
}
chunks, ip = self.preprocess(chunksize)
ret = ''
for i in range(len(chunks)):
ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
if chunksize == 8:
ret += ip_part + chunks[i]
else:
ret += chunks[i] + ip_part
self.target = ret
def handle_input16(self):
self.target = md5_text(self.target)
self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
def handle_input8(self):
self.target = md5_text(self.target)
ret = ''
for i in range(4):
part = self.target[8 * i:8 * (i + 1)]
ret += self.split_sum(part) + part
self.target = ret
def handleSum(self):
self.target = md5_text(self.target)
self.target = self.split_sum(self.target) + self.target
def date(self, scheme):
self.target = md5_text(self.target)
d = time.localtime(self.timestamp)
strings = {
'y': str(d.tm_year),
'm': '%02d' % d.tm_mon,
'd': '%02d' % d.tm_mday,
}
self.target += ''.join(strings[c] for c in scheme)
def split_time_even_odd(self):
even, odd = self.even_odd()
self.target = odd + md5_text(self.target) + even
def split_time_odd_even(self):
even, odd = self.even_odd()
self.target = even + md5_text(self.target) + odd
def split_ip_time_sum(self):
chunks, ip = self.preprocess(32)
self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
def split_time_ip_sum(self):
chunks, ip = self.preprocess(32)
self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip))
class IqiyiSDKInterpreter:
def __init__(self, sdk_code):
self.sdk_code = sdk_code
def run(self, target, ip, timestamp):
self.sdk_code = decode_packed_codes(self.sdk_code)
functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
sdk = IqiyiSDK(target, ip, timestamp)
other_functions = {
'handleSum': sdk.handleSum,
'handleInput8': sdk.handle_input8,
'handleInput16': sdk.handle_input16,
'splitTimeEvenOdd': sdk.split_time_even_odd,
'splitTimeOddEven': sdk.split_time_odd_even,
'splitIpTimeSum': sdk.split_ip_time_sum,
'splitTimeIpSum': sdk.split_time_ip_sum,
}
for function in functions:
if re.match(r'mod\d+', function):
sdk.mod(int(function[3:]))
elif re.match(r'date[ymd]{3}', function):
sdk.date(function[4:])
elif re.match(r'split\d+', function):
sdk.split(int(function[5:]))
elif function in other_functions:
other_functions[function]()
else:
raise ExtractorError(f'Unknown function {function}')
return sdk.target
class IqiyiIE(InfoExtractor):
IE_NAME = 'iqiyi'
IE_DESC = '爱奇艺'
_VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
_NETRC_MACHINE = 'iqiyi'
_TESTS = [{
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
# MD5 checksum differs on my machine and Travis CI
@@ -234,57 +101,6 @@ class IqiyiIE(InfoExtractor):
'18': 7, # 1080p
}
@staticmethod
def _rsa_fun(data):
# public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
e = 65537
return ohdave_rsa_encrypt(data, e, N)
def _perform_login(self, username, password):
data = self._download_json(
'http://kylin.iqiyi.com/get_token', None,
note='Get token for logging', errnote='Unable to get token for logging')
sdk = data['sdk']
timestamp = int(time.time())
target = (
f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}'
f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1')
interp = IqiyiSDKInterpreter(sdk)
sign = interp.run(target, data['ip'], timestamp)
validation_params = {
'target': target,
'server': 'BEA3AA1908656AABCCFF76582C4C6660',
'token': data['token'],
'bird_src': 'f8d91d57af224da7893dd397d52d811a',
'sign': sign,
'bird_t': timestamp,
}
validation_result = self._download_json(
'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None,
note='Validate credentials', errnote='Unable to validate credentials')
MSG_MAP = {
'P00107': 'please login via the web interface and enter the CAPTCHA code',
'P00117': 'bad username or password',
}
code = validation_result['code']
if code != 'A00000':
msg = MSG_MAP.get(code)
if not msg:
msg = f'error {code}'
if validation_result.get('msg'):
msg += ': ' + validation_result['msg']
self.report_warning('unable to log in: ' + msg)
return False
return True
def get_raw_data(self, tvid, video_id):
tm = int(time.time() * 1000)

View File

@@ -1,128 +0,0 @@
from .common import InfoExtractor
from ..utils import clean_html, int_or_none, traverse_obj
_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
class ManotoTVIE(InfoExtractor):
IE_DESC = 'Manoto TV (Episode)'
_VALID_URL = r'https?://(?:www\.)?manototv\.com/episode/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.manototv.com/episode/8475',
'info_dict': {
'id': '8475',
'series': 'خانه های رویایی با برادران اسکات',
'season_number': 7,
'episode_number': 25,
'episode_id': 'My Dream Home S7: Carol & John',
'duration': 3600,
'categories': ['سرگرمی'],
'title': 'کارول و جان',
'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e',
'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
'ext': 'mp4',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.manototv.com/episode/12576',
'info_dict': {
'id': '12576',
'series': 'فیلم های ایرانی',
'episode_id': 'Seh Mah Taatili',
'duration': 5400,
'view_count': int,
'categories': ['سرگرمی'],
'title': 'سه ماه تعطیلی',
'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.',
'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$',
'ext': 'mp4',
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
details = episode_json.get('details', {})
formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
return {
'id': video_id,
'series': details.get('showTitle'),
'season_number': int_or_none(details.get('analyticsSeasonNumber')),
'episode_number': int_or_none(details.get('episodeNumber')),
'episode_id': details.get('analyticsEpisodeTitle'),
'duration': int_or_none(details.get('durationInMinutes'), invscale=60),
'view_count': details.get('viewCount'),
'categories': [details.get('videoCategory')],
'title': details.get('episodeTitle'),
'description': clean_html(details.get('episodeDescription')),
'thumbnail': details.get('episodelandscapeImgIxUrl'),
'formats': formats,
}
class ManotoTVShowIE(InfoExtractor):
IE_DESC = 'Manoto TV (Show)'
_VALID_URL = r'https?://(?:www\.)?manototv\.com/show/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.manototv.com/show/2526',
'playlist_mincount': 68,
'info_dict': {
'id': '2526',
'title': 'فیلم های ایرانی',
'description': 'مجموعه ای از فیلم های سینمای کلاسیک ایران',
},
}]
def _real_extract(self, url):
show_id = self._match_id(url)
show_json = self._download_json(_API_URL.format('showmodule', 'details', show_id), show_id)
show_details = show_json.get('details', {})
title = show_details.get('showTitle')
description = show_details.get('showSynopsis')
series_json = self._download_json(_API_URL.format('showmodule', 'serieslist', show_id), show_id)
playlist_id = str(traverse_obj(series_json, ('details', 'list', 0, 'id')))
playlist_json = self._download_json(_API_URL.format('showmodule', 'episodelist', playlist_id), playlist_id)
playlist = traverse_obj(playlist_json, ('details', 'list')) or []
entries = [
self.url_result(
'https://www.manototv.com/episode/{}'.format(item['slideID']), ie=ManotoTVIE.ie_key(), video_id=item['slideID'])
for item in playlist]
return self.playlist_result(entries, show_id, title, description)
class ManotoTVLiveIE(InfoExtractor):
IE_DESC = 'Manoto TV (Live)'
_VALID_URL = r'https?://(?:www\.)?manototv\.com/live/'
_TEST = {
'url': 'https://www.manototv.com/live/',
'info_dict': {
'id': 'live',
'title': 'Manoto TV Live',
'ext': 'mp4',
'is_live': True,
},
'params': {
'skip_download': 'm3u8',
},
}
def _real_extract(self, url):
video_id = 'live'
json = self._download_json(_API_URL.format('livemodule', 'details', ''), video_id)
details = json.get('details', {})
video_url = details.get('liveUrl')
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
return {
'id': video_id,
'title': 'Manoto TV Live',
'is_live': True,
'formats': formats,
}

View File

@@ -478,3 +478,64 @@ class NebulaChannelIE(NebulaBaseIE):
playlist_id=collection_slug,
playlist_title=channel.get('title'),
playlist_description=channel.get('description'))
class NebulaSeasonIE(NebulaBaseIE):
IE_NAME = 'nebula:season'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<series>[\w-]+)/season/(?P<season_number>[\w-]+)'
_TESTS = [{
'url': 'https://nebula.tv/jetlag/season/15',
'info_dict': {
'id': 'jetlag_15',
'title': 'Tag: All Stars',
'description': 'md5:5aa5b8abf3de71756448dc44ffebb674',
},
'playlist_count': 8,
}, {
'url': 'https://nebula.tv/jetlag/season/14',
'info_dict': {
'id': 'jetlag_14',
'title': 'Snake',
'description': 'md5:6da9040f1c2ac559579738bfb6919d1e',
},
'playlist_count': 8,
}, {
'url': 'https://nebula.tv/jetlag/season/13-5',
'info_dict': {
'id': 'jetlag_13-5',
'title': 'Hide + Seek Across NYC',
'description': 'md5:5b87bb9acc6dcdff289bb4c71a2ad59f',
},
'playlist_count': 3,
}]
def _build_url_result(self, item):
url = (
traverse_obj(item, ('share_url', {url_or_none}))
or urljoin('https://nebula.tv/', item.get('app_path'))
or f'https://nebula.tv/videos/{item["slug"]}')
return self.url_result(
smuggle_url(url, {'id': item['id']}),
NebulaIE, url_transparent=True,
**self._extract_video_metadata(item))
def _entries(self, data):
for episode in traverse_obj(data, ('episodes', lambda _, v: v['video']['id'], 'video')):
yield self._build_url_result(episode)
for extra in traverse_obj(data, ('extras', ..., 'items', lambda _, v: v['id'])):
yield self._build_url_result(extra)
for trailer in traverse_obj(data, ('trailers', lambda _, v: v['id'])):
yield self._build_url_result(trailer)
def _real_extract(self, url):
series, season_id = self._match_valid_url(url).group('series', 'season_number')
playlist_id = f'{series}_{season_id}'
data = self._call_api(
f'https://content.api.nebula.app/content/{series}/season/{season_id}', playlist_id)
return self.playlist_result(
self._entries(data), playlist_id,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
}))

View File

@@ -528,7 +528,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:program'
IE_DESC = '网易云音乐 - 电台节目'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:dj|program)\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': {
@@ -572,6 +572,9 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'params': {
'noplaylist': True,
},
}, {
'url': 'https://music.163.com/#/dj?id=3706179315',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -2,84 +2,59 @@ from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
js_to_json,
parse_iso8601,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class NetzkinoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P<id>[^/]+)'
_GEO_COUNTRIES = ['DE']
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/details/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond',
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
'url': 'https://www.netzkino.de/details/snow-beast',
'md5': '1a4c90fe40d3ccabce163287e45e56dd',
'info_dict': {
'id': 'rakete-zum-mond',
'id': 'snow-beast',
'ext': 'mp4',
'title': 'Rakete zum Mond \u2013 Jules Verne',
'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60',
'upload_date': '20120813',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1344858571,
'title': 'Snow Beast',
'age_limit': 12,
},
'params': {
'skip_download': 'Download only works from Germany',
},
}, {
'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2',
'md5': 'c7728b2dadd04ff6727814847a51ef03',
'info_dict': {
'id': 'dr-jekyll-mrs-hyde-2',
'ext': 'mp4',
'title': 'Dr. Jekyll & Mrs. Hyde 2',
'description': 'md5:c2e9626ebd02de0a794b95407045d186',
'upload_date': '20190130',
'thumbnail': r're:https?://.*\.jpg$',
'timestamp': 1548849437,
'age_limit': 18,
},
'params': {
'skip_download': 'Download only works from Germany',
'alt_title': 'Snow Beast',
'cast': 'count:3',
'categories': 'count:7',
'creators': 'count:2',
'description': 'md5:e604a954a7f827a80e96a3a97d48b269',
'location': 'US',
'release_year': 2011,
'thumbnail': r're:https?://.+\.jpg',
},
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
next_js_data = self._search_nextjs_data(webpage, video_id)
api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www'
info = self._download_json(api_url, video_id)
custom_fields = info['custom_fields']
production_js = self._download_webpage(
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
note='Downloading player code')
avo_js = self._search_regex(
r'var urlTemplate=(\{.*?"\})',
production_js, 'URL templates')
templates = self._parse_json(
avo_js, video_id, transform_source=js_to_json)
suffix = {
'hds': '.mp4/manifest.f4m',
'hls': '.mp4/master.m3u8',
'pmd': '.mp4',
}
film_fn = custom_fields['Streaming'][0]
formats = [{
'format_id': key,
'ext': 'mp4',
'url': tpl.replace('{}', film_fn) + suffix[key],
} for key, tpl in templates.items()]
query = traverse_obj(next_js_data, (
'props', '__dehydratedState', 'queries', ..., 'state',
'data', 'data', lambda _, v: v['__typename'] == 'CmsMovie', any))
if 'DRM' in traverse_obj(query, ('licenses', 'nodes', ..., 'properties', {str})):
self.report_drm(video_id)
return {
'id': video_id,
'formats': formats,
'title': info['title'],
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
'description': clean_html(info.get('content')),
'thumbnail': info.get('thumbnail'),
**traverse_obj(query, {
'title': ('originalTitle', {clean_html}),
'age_limit': ('fskRating', {int_or_none}),
'alt_title': ('originalTitle', {clean_html}, filter),
'cast': ('cast', 'nodes', ..., 'person', 'name', {clean_html}, filter),
'creators': (('directors', 'writers'), 'nodes', ..., 'person', 'name', {clean_html}, filter),
'categories': ('categories', 'nodes', ..., 'category', 'title', {clean_html}, filter),
'description': ('longSynopsis', {clean_html}, filter),
'duration': ('runtimeInSeconds', {int_or_none}),
'location': ('productionCountry', {clean_html}, filter),
'release_year': ('productionYear', {int_or_none}),
'thumbnail': ('coverImage', 'masterUrl', {url_or_none}),
'url': ('videoSource', 'pmdUrl', {urljoin('https://pmd.netzkino-seite.netzkino.de/')}),
}),
}

View File

@@ -1,238 +0,0 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
int_or_none,
parse_iso8601,
remove_start,
unified_timestamp,
)
class NextMediaIE(InfoExtractor):
IE_DESC = '蘋果日報'
_VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
'info_dict': {
'id': '53109199',
'ext': 'mp4',
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
'timestamp': 1415456273,
'upload_date': '20141108',
},
}]
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
def _real_extract(self, url):
news_id = self._match_id(url)
page = self._download_webpage(url, news_id)
return self._extract_from_nextmedia_page(news_id, url, page)
def _extract_from_nextmedia_page(self, news_id, url, page):
redirection_url = self._search_regex(
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
page, 'redirection URL', default=None, group='url')
if redirection_url:
return self.url_result(urllib.parse.urljoin(url, redirection_url))
title = self._fetch_title(page)
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
attrs = {
'id': news_id,
'title': title,
'url': video_url, # ext can be inferred from url
'thumbnail': self._fetch_thumbnail(page),
'description': self._fetch_description(page),
}
timestamp = self._fetch_timestamp(page)
if timestamp:
attrs['timestamp'] = timestamp
else:
attrs['upload_date'] = self._fetch_upload_date(url)
return attrs
def _fetch_title(self, page):
return self._og_search_title(page)
def _fetch_thumbnail(self, page):
return self._og_search_thumbnail(page)
def _fetch_timestamp(self, page):
date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
return parse_iso8601(date_created)
def _fetch_upload_date(self, url):
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
def _fetch_description(self, page):
return self._og_search_property('description', page)
class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '蘋果日報 - 動新聞'
_VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
_TESTS = [{
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
'info_dict': {
'id': '19009428',
'ext': 'mp4',
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
'timestamp': 1421791200,
'upload_date': '20150120',
},
}]
def _real_extract(self, url):
news_id = self._match_id(url)
actionnews_page = self._download_webpage(url, news_id)
article_url = self._og_search_url(actionnews_page)
article_page = self._download_webpage(article_url, news_id)
return self._extract_from_nextmedia_page(news_id, url, article_page)
class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE
IE_DESC = '臺灣蘋果日報'
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
'info_dict': {
'id': '36354694',
'ext': 'mp4',
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
'upload_date': '20150128',
},
}, {
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
'md5': '86b4e9132d158279c7883822d94ccc49',
'info_dict': {
'id': '550549',
'ext': 'mp4',
'title': '不滿被踩腳 山東兩大媽一路打下車',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
'upload_date': '20150128',
},
}, {
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
'md5': '03df296d95dedc2d5886debbb80cb43f',
'info_dict': {
'id': '5003671',
'ext': 'mp4',
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
'upload_date': '20150128',
},
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
}, {
# No thumbnail
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
'info_dict': {
'id': '5003673',
'ext': 'mp4',
'title': '半夜尿尿 好像會看到___',
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
'upload_date': '20150128',
},
'expected_warnings': [
'video thumbnail',
],
'skip': 'redirect to http://www.appledaily.com.tw/animation/',
}, {
'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
'md5': 'eaa20e6b9df418c912d7f5dec2ba734d',
'info_dict': {
'id': '35770334',
'ext': 'mp4',
'title': '咖啡占卜測 XU裝熟指數',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
'upload_date': '20140417',
},
}, {
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
'only_matching': True,
}, {
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
'only_matching': True,
}]
_URL_PATTERN = r'\{url: \'(.+)\'\}'
def _fetch_title(self, page):
return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
or self._html_search_meta('description', page, 'news title'))
def _fetch_thumbnail(self, page):
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
def _fetch_timestamp(self, page):
return None
def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description')
class NextTVIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_DESC = '壹電視'
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
_TEST = {
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
'info_dict': {
'id': '11779671',
'ext': 'mp4',
'title': '「超收稅」近4千億 藍議員籲發消費券',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1484825400,
'upload_date': '20170119',
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
data = self._hidden_inputs(webpage)
video_url = data['ntt-vod-src-detailview']
date_str = get_element_by_class('date', webpage)
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
view_count = int_or_none(remove_start(
clean_html(get_element_by_class('click', webpage)), '點閱:'))
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': data.get('ntt-vod-img-src'),
'timestamp': timestamp,
'view_count': view_count,
}

View File

@@ -0,0 +1,83 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
UserNotLive,
filter_dict,
int_or_none,
join_nonempty,
parse_iso8601,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class PandaTvIE(InfoExtractor):
IE_DESC = 'pandalive.co.kr (팬더티비)'
_VALID_URL = r'https?://(?:www\.|m\.)?pandalive\.co\.kr/play/(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.pandalive.co.kr/play/bebenim',
'info_dict': {
'id': 'bebenim',
'ext': 'mp4',
'channel': '릴리ෆ',
'title': r're:앙앙❤ \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'thumbnail': r're:https://cdn\.pandalive\.co\.kr/ivs/v1/.+/thumb\.jpg',
'concurrent_view_count': int,
'like_count': int,
'live_status': 'is_live',
'upload_date': str,
},
'skip': 'The channel is not currently live',
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
video_meta = self._download_json(
'https://api.pandalive.co.kr/v1/live/play', channel_id,
'Downloading video meta data', 'Unable to download video meta data',
data=urlencode_postdata(filter_dict({
'action': 'watch',
'userId': channel_id,
'password': self.get_param('videopassword'),
})), expected_status=400)
if error_code := traverse_obj(video_meta, ('errorData', 'code', {str})):
if error_code == 'castEnd':
raise UserNotLive(video_id=channel_id)
elif error_code == 'needAdult':
self.raise_login_required('Adult verification is required for this stream')
elif error_code == 'needLogin':
self.raise_login_required('Login is required for this stream')
elif error_code == 'needCoinPurchase':
raise ExtractorError('Coin purchase is required for this stream', expected=True)
elif error_code == 'needUnlimitItem':
raise ExtractorError('Ticket purchase is required for this stream', expected=True)
elif error_code == 'needPw':
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
elif error_code == 'wrongPw':
raise ExtractorError('Wrong password', expected=True)
else:
error_msg = video_meta.get('message')
raise ExtractorError(join_nonempty(
'API returned error code', error_code,
error_msg and 'with error message:', error_msg,
delim=' '))
http_headers = {'Origin': 'https://www.pandalive.co.kr'}
return {
'id': channel_id,
'is_live': True,
'formats': self._extract_m3u8_formats(
video_meta['PlayList']['hls'][0]['url'], channel_id, 'mp4', headers=http_headers, live=True),
'http_headers': http_headers,
**traverse_obj(video_meta, ('media', {
'title': ('title', {str}),
'release_timestamp': ('startTime', {parse_iso8601(delim=' ')}),
'thumbnail': ('ivsThumbnail', {url_or_none}),
'channel': ('userNick', {str}),
'concurrent_view_count': ('user', {int_or_none}),
'like_count': ('likeCnt', {int_or_none}),
})),
}

View File

@@ -6,7 +6,10 @@ from ..utils.traversal import traverse_obj
class PartiBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note=None):
return self._download_json(
f'https://api-backend.parti.com/parti_v2/profile/{path}', video_id, note)
f'https://prod-api.parti.com/parti_v2/profile/{path}', video_id, note, headers={
'Origin': 'https://parti.com',
'Referer': 'https://parti.com/',
})
class PartiVideoIE(PartiBaseIE):
@@ -20,7 +23,7 @@ class PartiVideoIE(PartiBaseIE):
'title': 'NOW LIVE ',
'upload_date': '20250327',
'categories': ['Gaming'],
'thumbnail': 'https://assets.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
'thumbnail': 'https://media.parti.com/351424_eb9e5250-2821-484a-9c5f-ca99aa666c87.png',
'channel': 'ItZTMGG',
'timestamp': 1743044379,
},
@@ -34,7 +37,7 @@ class PartiVideoIE(PartiBaseIE):
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
urljoin('https://watch.parti.com', data['livestream_recording']), video_id, 'mp4'),
urljoin('https://media.parti.com/', data['livestream_recording']), video_id, 'mp4'),
**traverse_obj(data, {
'title': ('event_title', {str}),
'channel': ('user_name', {str}),
@@ -47,32 +50,27 @@ class PartiVideoIE(PartiBaseIE):
class PartiLivestreamIE(PartiBaseIE):
IE_NAME = 'parti:livestream'
_VALID_URL = r'https?://(?:www\.)?parti\.com/creator/(?P<service>[\w]+)/(?P<id>[\w/-]+)'
_VALID_URL = r'https?://(?:www\.)?parti\.com/(?!video/)(?P<id>[\w/-]+)'
_TESTS = [{
'url': 'https://parti.com/creator/parti/Capt_Robs_Adventures',
'url': 'https://parti.com/247CryptoTracker',
'info_dict': {
'id': 'Capt_Robs_Adventures',
'ext': 'mp4',
'id': '247CryptoTracker',
'description': 'md5:a78051f3d7e66e6a64c6b1eaf59fd364',
'title': r"re:I'm Live on Parti \d{4}-\d{2}-\d{2} \d{2}:\d{2}",
'view_count': int,
'thumbnail': r're:https://assets\.parti\.com/.+\.png',
'timestamp': 1743879776,
'upload_date': '20250405',
'thumbnail': r're:https://media\.parti\.com/stream-screenshots/.+\.png',
'live_status': 'is_live',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://parti.com/creator/discord/sazboxgaming/0',
'only_matching': True,
}]
def _real_extract(self, url):
service, creator_slug = self._match_valid_url(url).group('service', 'id')
creator_slug = self._match_id(url)
encoded_creator_slug = creator_slug.replace('/', '%23')
creator_id = self._call_api(
f'get_user_by_social_media/{service}/{encoded_creator_slug}',
creator_slug, note='Fetching user ID')
f'user_id_from_name/{encoded_creator_slug}',
creator_slug, note='Fetching user ID')['user_id']
data = self._call_api(
f'get_livestream_channel_info/{creator_id}', creator_id,
@@ -85,11 +83,7 @@ class PartiLivestreamIE(PartiBaseIE):
return {
'id': creator_slug,
'formats': self._extract_m3u8_formats(
channel_info['playback_url'], creator_slug, live=True, query={
'token': channel_info['playback_auth_token'],
'player_version': '1.17.0',
}),
'formats': self._extract_m3u8_formats(channel_info['playback_url'], creator_slug, live=True),
'is_live': True,
**traverse_obj(data, {
'title': ('livestream_event_info', 'event_name', {str}),

View File

@@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
str_or_none,
strip_or_none,
traverse_obj,
update_url,
)
@@ -50,7 +51,6 @@ class PicartoIE(InfoExtractor):
if metadata.get('online') == 0:
raise ExtractorError('Stream is offline', expected=True)
title = metadata['title']
cdn_data = self._download_json(''.join((
update_url(data['getLoadBalancerUrl']['url'], scheme='https'),
@@ -79,7 +79,7 @@ class PicartoIE(InfoExtractor):
return {
'id': channel_id,
'title': title.strip(),
'title': strip_or_none(metadata.get('title')),
'is_live': True,
'channel': channel_id,
'channel_id': metadata.get('id'),
@@ -159,7 +159,7 @@ class PicartoVodIE(InfoExtractor):
'id': video_id,
**traverse_obj(data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'title': ('title', {str.strip}),
'thumbnail': 'video_recording_image_url',
'channel': ('channel', 'name', {str}),
'age_limit': ('adult', {lambda x: 18 if x else 0}),

View File

@@ -24,6 +24,7 @@ from ..utils import (
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import find_elements, traverse_obj
class PornHubBaseIE(InfoExtractor):
@@ -137,23 +138,24 @@ class PornHubIE(PornHubBaseIE):
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
'md5': '4d4a4e9178b655776f86cf89ecaf0edf',
'info_dict': {
'id': '648719015',
'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes',
'uploader': 'BABES-COM',
'uploader_id': '/users/babes-com',
'upload_date': '20130628',
'timestamp': 1372447216,
'duration': 361,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 18,
'tags': list,
'categories': list,
'cast': list,
'thumbnail': r're:https?://.+',
},
}, {
# non-ASCII title
@@ -480,13 +482,6 @@ class PornHubIE(PornHubBaseIE):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
def extract_list(meta_key):
div = self._search_regex(
rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>',
webpage, meta_key, default=None)
if div:
return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant
info['description'] = None
@@ -505,9 +500,11 @@ class PornHubIE(PornHubBaseIE):
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
'tags': extract_list('tags'),
'categories': extract_list('categories'),
'cast': extract_list('pornstars'),
**traverse_obj(webpage, {
'tags': ({find_elements(attr='data-label', value='tag')}, ..., {clean_html}),
'categories': ({find_elements(attr='data-label', value='category')}, ..., {clean_html}),
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
}),
'subtitles': subtitles,
}, info)

View File

@@ -1,137 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
decode_packed_codes,
urlencode_postdata,
)
class SCTEBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
_NETRC_MACHINE = 'scte'
def _perform_login(self, username, password):
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']welcome\b', r'>Sign Out<'))
# already logged in
if is_logged(login_popup):
return
login_form = self._hidden_inputs(login_popup)
login_form.update({
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
})
response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form))
if '|pageRedirect|' not in response and not is_logged(response):
error = self._html_search_regex(
r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
response, 'error message', default=None)
if error:
raise ExtractorError(f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in')
class SCTEIE(SCTEBaseIE):
_WORKING = False
_VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
'info_dict': {
'title': 'Introduction to DOCSIS Engineering Professional',
'id': '31484',
},
'playlist_count': 5,
'skip': 'Requires account credentials',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
content_base = f'https://learning.scte.org/pluginfile.php/{context_id}/mod_scorm/content/8/'
context = decode_packed_codes(self._download_webpage(
f'{content_base}mobile/data.js', video_id))
data = self._parse_xml(
self._search_regex(
r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
video_id)
entries = []
for asset in data.findall('.//asset'):
asset_url = asset.get('url')
if not asset_url or not asset_url.endswith('.mp4'):
continue
asset_id = self._search_regex(
r'video_([^_]+)_', asset_url, 'asset id', default=None)
if not asset_id:
continue
entries.append({
'id': asset_id,
'title': title,
'url': content_base + asset_url,
})
return self.playlist_result(entries, video_id, title)
class SCTECourseIE(SCTEBaseIE):
_WORKING = False
_VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
'only_matching': True,
}, {
'url': 'https://learning.scte.org/course/view.php?id=3639',
'only_matching': True,
}, {
'url': 'https://learning.scte.org/course/view.php?id=3073',
'only_matching': True,
}]
def _real_extract(self, url):
course_id = self._match_id(url)
webpage = self._download_webpage(url, course_id)
title = self._search_regex(
r'<h1>(.+?)</h1>', webpage, 'title', default=None)
entries = []
for mobj in re.finditer(
r'''(?x)
<a[^>]+
href=(["\'])
(?P<url>
https?://learning\.scte\.org/mod/
(?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
\bid=\d+
)
''',
webpage):
item_url = mobj.group('url')
if item_url == url:
continue
ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
else SCTECourseIE.ie_key())
entries.append(self.url_result(item_url, ie=ie))
return self.playlist_result(entries, course_id, title)

View File

@@ -0,0 +1,243 @@
import base64
import binascii
import functools
import re
import urllib.parse
from .common import InfoExtractor
from ..dependencies import Cryptodome
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
urljoin,
)
from ..utils.traversal import (
find_element,
find_elements,
require,
traverse_obj,
)
class TarangPlusBaseIE(InfoExtractor):
_BASE_URL = 'https://tarangplus.in'
class TarangPlusVideoIE(TarangPlusBaseIE):
IE_NAME = 'tarangplus:video'
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)(?P<id>[^#?/]+)'
_TESTS = [{
'url': 'https://tarangplus.in/tarangaplus-originals/khitpit/khitpit-ep-10',
'md5': '78ce056cee755687b8a48199909ecf53',
'info_dict': {
'id': '67b8206719521d054c0059b7',
'display_id': 'khitpit-ep-10',
'ext': 'mp4',
'title': 'Khitpit Ep-10',
'description': 'md5:a45b805cb628e15c853d78b0406eab48',
'thumbnail': r're:https?://.+/.+\.jpg',
'duration': 756.0,
'timestamp': 1740355200,
'upload_date': '20250224',
'media_type': 'episode',
'categories': ['Originals'],
},
}, {
'url': 'https://tarangplus.in/tarang-serials/bada-bohu/bada-bohu-ep-233',
'md5': 'b4f9beb15172559bb362203b4f48382e',
'info_dict': {
'id': '680b9d6c19521d054c007782',
'display_id': 'bada-bohu-ep-233',
'ext': 'mp4',
'title': 'Bada Bohu | Ep -233',
'description': 'md5:e6b8e7edc9e60b92c1b390f8789ecd69',
'thumbnail': r're:https?://.+/.+\.jpg',
'duration': 1392.0,
'timestamp': 1745539200,
'upload_date': '20250425',
'media_type': 'episode',
'categories': ['Prime'],
},
}, {
# Decrypted m3u8 URL has trailing control characters that need to be stripped
'url': 'https://tarangplus.in/tarangaplus-originals/ichha/ichha-teaser-1',
'md5': '16ee43fe21ad8b6e652ec65eba38a64e',
'info_dict': {
'id': '5f0f252d3326af0720000342',
'ext': 'mp4',
'display_id': 'ichha-teaser-1',
'title': 'Ichha Teaser',
'description': 'md5:c724b0b0669a2cefdada3711cec792e6',
'media_type': 'episode',
'duration': 21.0,
'thumbnail': r're:https?://.+/.+\.jpg',
'categories': ['Originals'],
'timestamp': 1758153600,
'upload_date': '20250918',
},
}, {
'url': 'https://tarangplus.in/short/ai-maa/ai-maa',
'only_matching': True,
}, {
'url': 'https://tarangplus.in/shows/tarang-cine-utsav-2024/tarang-cine-utsav-2024-seg-1',
'only_matching': True,
}, {
'url': 'https://tarangplus.in/music-videos/chori-chori-bohu-chori-songs/nijara-laguchu-dhire-dhire',
'only_matching': True,
}, {
'url': 'https://tarangplus.in/kids-shows/chhota-jaga/chhota-jaga-ep-33-jamidar-ra-khajana-adaya',
'only_matching': True,
}, {
'url': 'https://tarangplus.in/movies/swayambara',
'only_matching': True,
}]
def decrypt(self, data, key):
if not Cryptodome.AES:
raise ExtractorError('pycryptodomex not found. Please install', expected=True)
iv = binascii.unhexlify('00000000000000000000000000000000')
cipher = Cryptodome.AES.new(base64.b64decode(key), Cryptodome.AES.MODE_CBC, iv)
return cipher.decrypt(base64.b64decode(data)).decode('utf-8')
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
hidden_inputs_data = self._hidden_inputs(webpage)
json_ld_data = self._search_json_ld(webpage, display_id)
json_ld_data.pop('url', None)
iframe_url = traverse_obj(webpage, (
{find_element(tag='iframe', attr='src', value=r'.+[?&]contenturl=.+', html=True, regex=True)},
{extract_attributes}, 'src', {require('iframe URL')}))
# Can't use parse_qs here since it would decode the encrypted base64 `+` chars to spaces
content = self._search_regex(r'[?&]contenturl=(.+)', iframe_url, 'content')
encrypted_data, _, attrs = content.partition('|')
metadata = {
m.group('k'): m.group('v')
for m in re.finditer(r'(?:^|\|)(?P<k>[a-z_]+)=(?P<v>(?:(?!\|[a-z_]+=).)+)', attrs)
}
m3u8_url = urllib.parse.unquote(
self.decrypt(encrypted_data, metadata['key'])).rstrip('\x0e\x0f')
return {
'id': display_id, # Fallback
'display_id': display_id,
**json_ld_data,
**traverse_obj(metadata, {
'id': ('content_id', {str}),
'title': ('title', {str}),
'thumbnail': ('image', {str}),
}),
**traverse_obj(hidden_inputs_data, {
'id': ('content_id', {str}),
'media_type': ('theme_type', {str}),
'categories': ('genre', {str}, filter, all, filter),
}),
'formats': self._extract_m3u8_formats(m3u8_url, display_id),
}
class TarangPlusEpisodesIE(TarangPlusBaseIE):
IE_NAME = 'tarangplus:episodes'
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P<type>[^#?/]+)/(?P<id>[^#?/]+)/episodes/?(?:$|[?#])'
_TESTS = [{
'url': 'https://tarangplus.in/tarangaplus-originals/balijatra/episodes',
'info_dict': {
'id': 'balijatra',
'title': 'Balijatra',
},
'playlist_mincount': 7,
}, {
'url': 'https://tarangplus.in/tarang-serials/bada-bohu/episodes',
'info_dict': {
'id': 'bada-bohu',
'title': 'Bada Bohu',
},
'playlist_mincount': 236,
}, {
'url': 'https://tarangplus.in/shows/dr-nonsense/episodes',
'info_dict': {
'id': 'dr-nonsense',
'title': 'Dr. Nonsense',
},
'playlist_mincount': 15,
}]
_PAGE_SIZE = 20
def _entries(self, playlist_url, playlist_id, page):
data = self._download_json(
playlist_url, playlist_id, f'Downloading playlist JSON page {page + 1}',
query={'page_no': page})
for item in traverse_obj(data, ('items', ..., {str})):
yield self.url_result(
urljoin(self._BASE_URL, item.split('$')[3]), TarangPlusVideoIE)
def _real_extract(self, url):
url_type, display_id = self._match_valid_url(url).group('type', 'id')
series_url = f'{self._BASE_URL}/{url_type}/{display_id}'
webpage = self._download_webpage(series_url, display_id)
entries = OnDemandPagedList(
functools.partial(self._entries, f'{series_url}/episodes', display_id),
self._PAGE_SIZE)
return self.playlist_result(
entries, display_id, self._hidden_inputs(webpage).get('title'))
class TarangPlusPlaylistIE(TarangPlusBaseIE):
IE_NAME = 'tarangplus:playlist'
_VALID_URL = r'https?://(?:www\.)?tarangplus\.in/(?P<id>[^#?/]+)/all/?(?:$|[?#])'
_TESTS = [{
'url': 'https://tarangplus.in/chhota-jaga/all',
'info_dict': {
'id': 'chhota-jaga',
'title': 'Chhota Jaga',
},
'playlist_mincount': 33,
}, {
'url': 'https://tarangplus.in/kids-yali-show/all',
'info_dict': {
'id': 'kids-yali-show',
'title': 'Yali',
},
'playlist_mincount': 10,
}, {
'url': 'https://tarangplus.in/trailer/all',
'info_dict': {
'id': 'trailer',
'title': 'Trailer',
},
'playlist_mincount': 57,
}, {
'url': 'https://tarangplus.in/latest-songs/all',
'info_dict': {
'id': 'latest-songs',
'title': 'Latest Songs',
},
'playlist_mincount': 46,
}, {
'url': 'https://tarangplus.in/premium-serials-episodes/all',
'info_dict': {
'id': 'premium-serials-episodes',
'title': 'Primetime Latest Episodes',
},
'playlist_mincount': 100,
}]
def _entries(self, webpage):
for url_path in traverse_obj(webpage, (
{find_elements(cls='item')}, ...,
{find_elements(tag='a', attr='href', value='/.+', html=True, regex=True)},
..., {extract_attributes}, 'href',
)):
yield self.url_result(urljoin(self._BASE_URL, url_path), TarangPlusVideoIE)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self.playlist_result(
self._entries(webpage), display_id,
traverse_obj(webpage, ({find_element(id='al_title')}, {clean_html})))

View File

@@ -6,20 +6,21 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
int_or_none,
join_nonempty,
str_or_none,
traverse_obj,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class TelecincoBaseIE(InfoExtractor):
def _parse_content(self, content, url):
video_id = content['dataMediaId']
video_id = content['dataMediaId'][1]
config = self._download_json(
content['dataConfig'], video_id, 'Downloading config JSON')
content['dataConfig'][1], video_id, 'Downloading config JSON')
services = config['services']
caronte = self._download_json(services['caronte'], video_id)
if traverse_obj(caronte, ('dls', 0, 'drm', {bool})):
@@ -57,9 +58,9 @@ class TelecincoBaseIE(InfoExtractor):
'id': video_id,
'title': traverse_obj(config, ('info', 'title', {str})),
'formats': formats,
'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none}))
'thumbnail': (traverse_obj(content, ('dataPoster', 1, {url_or_none}))
or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)),
'duration': traverse_obj(content, ('dataDuration', {int_or_none})),
'duration': traverse_obj(content, ('dataDuration', 1, {int_or_none})),
'http_headers': headers,
}
@@ -137,30 +138,45 @@ class TelecincoIE(TelecincoBaseIE):
'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
'only_matching': True,
}]
_ASTRO_ISLAND_RE = re.compile(r'<astro-island\b[^>]+>')
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, impersonate=True)
article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article', display_id)['article']
description = traverse_obj(article, ('leadParagraph', {clean_html}, filter))
if article.get('editorialType') != 'VID':
props_list = traverse_obj(webpage, (
{self._ASTRO_ISLAND_RE.findall}, ...,
{extract_attributes}, 'props', {json.loads}))
description = traverse_obj(props_list, (..., 'leadParagraph', 1, {clean_html}, any, filter))
main_content = traverse_obj(props_list, (..., ('content', ('articleData', 1, 'opening')), 1, {dict}, any))
if traverse_obj(props_list, (..., 'editorialType', 1, {str}, any)) != 'VID': # e.g. 'ART'
entries = []
for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])):
content = p['content']
type_ = p.get('type')
if type_ == 'paragraph' and isinstance(content, str):
for p in traverse_obj(props_list, (..., 'articleData', 1, ('opening', ('body', 1, ...)), 1, {dict})):
type_ = traverse_obj(p, ('type', 1, {str}))
content = traverse_obj(p, ('content', 1, {str} if type_ == 'paragraph' else {dict}))
if not content:
continue
if type_ == 'paragraph':
description = join_nonempty(description, content, delim='')
elif type_ == 'video' and isinstance(content, dict):
elif type_ == 'video':
entries.append(self._parse_content(content, url))
else:
self.report_warning(
f'Skipping unsupported content type "{type_}"', display_id, only_once=True)
return self.playlist_result(
entries, str_or_none(article.get('id')),
traverse_obj(article, ('title', {str})), clean_html(description))
entries,
traverse_obj(props_list, (..., 'id', 1, {int}, {str_or_none}, any)) or display_id,
traverse_obj(main_content, ('dataTitle', 1, {str})),
clean_html(description))
info = self._parse_content(article['opening']['content'], url)
if not main_content:
raise ExtractorError('Unable to extract main content from webpage')
info = self._parse_content(main_content, url)
info['description'] = description
return info

View File

@@ -454,6 +454,7 @@ class TikTokBaseIE(InfoExtractor):
'like_count': 'digg_count',
'repost_count': 'share_count',
'comment_count': 'comment_count',
'save_count': 'collect_count',
}, expected_type=int_or_none),
**author_info,
'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
@@ -607,6 +608,7 @@ class TikTokBaseIE(InfoExtractor):
'like_count': 'diggCount',
'repost_count': 'shareCount',
'comment_count': 'commentCount',
'save_count': 'collectCount',
}), expected_type=int_or_none),
'thumbnails': [
{
@@ -646,6 +648,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'artist': 'Ysrbeats',
'album': 'Lehanga',
'track': 'Lehanga',
@@ -675,6 +678,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
'track': 'Big Fun',
},
@@ -702,6 +706,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
},
}, {
# Sponsored video, only available with feed workaround
@@ -725,6 +730,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
},
'skip': 'This video is unavailable',
}, {
@@ -751,6 +757,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
},
}, {
# hydration JSON is sent in a <script> element
@@ -773,6 +780,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
},
'skip': 'This video is unavailable',
}, {
@@ -798,6 +806,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
},
}, {
@@ -824,6 +833,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:^https://.+',
'thumbnails': 'count:3',
},
@@ -851,6 +861,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:^https://.+\.webp',
},
'skip': 'Unavailable via feed API, only audio available via web',
@@ -879,6 +890,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
'save_count': int,
'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
},
}, {
@@ -1288,6 +1300,7 @@ class DouyinIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:https?://.+\.jpe?g',
},
}, {
@@ -1312,6 +1325,7 @@ class DouyinIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:https?://.+\.jpe?g',
},
}, {
@@ -1336,6 +1350,7 @@ class DouyinIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:https?://.+\.jpe?g',
},
}, {
@@ -1353,6 +1368,7 @@ class DouyinIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
},
'skip': 'No longer available',
}, {
@@ -1377,6 +1393,7 @@ class DouyinIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:https?://.+\.jpe?g',
},
}]
@@ -1437,6 +1454,7 @@ class TikTokVMIE(InfoExtractor):
'view_count': int,
'like_count': int,
'comment_count': int,
'save_count': int,
'thumbnail': r're:https://.+\.webp.*',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAdZ_NcPPgMneaGrW0hN8O_J_bwLshwNNERRF5DxOw2HKIzk0kdlLrR8RkVl1ksrMO',
'duration': 29,

View File

@@ -15,7 +15,7 @@ from ..utils import (
class TubiTvIE(InfoExtractor):
IE_NAME = 'tubitv'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:[a-z]{2}-[a-z]{2}/)?(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_TESTS = [{
@@ -73,6 +73,9 @@ class TubiTvIE(InfoExtractor):
'release_year': 1979,
},
'skip': 'Content Unavailable',
}, {
'url': 'https://tubitv.com/es-mx/tv-shows/477363/s01-e03-jacob-dos-dos-y-la-tarjets-de-hockey-robada',
'only_matching': True,
}]
# DRM formats are included only to raise appropriate error

View File

@@ -20,6 +20,8 @@ class TumblrIE(InfoExtractor):
'id': '54196191430',
'ext': 'mp4',
'title': 'md5:dfac39636969fe6bf1caa2d50405f069',
'timestamp': 1372531260,
'upload_date': '20130629',
'description': 'md5:390ab77358960235b6937ab3b8528956',
'uploader_id': 'tatianamaslanydaily',
'uploader_url': 'https://tatianamaslanydaily.tumblr.com/',
@@ -39,6 +41,8 @@ class TumblrIE(InfoExtractor):
'ext': 'mp4',
'title': 'Mona\xa0“talking” in\xa0“english”',
'description': 'md5:082a3a621530cb786ad2b7592a6d9e2c',
'timestamp': 1597865276,
'upload_date': '20200819',
'uploader_id': 'maskofthedragon',
'uploader_url': 'https://maskofthedragon.tumblr.com/',
'thumbnail': r're:^https?://.*\.jpg',
@@ -76,6 +80,8 @@ class TumblrIE(InfoExtractor):
'id': '159704441298',
'ext': 'mp4',
'title': 'md5:ba79365861101f4911452728d2950561',
'timestamp': 1492489550,
'upload_date': '20170418',
'description': 'md5:773738196cea76b6996ec71e285bdabc',
'uploader_id': 'jujanon',
'uploader_url': 'https://jujanon.tumblr.com/',
@@ -93,6 +99,8 @@ class TumblrIE(InfoExtractor):
'id': '180294460076',
'ext': 'mp4',
'title': 'duality of bird',
'timestamp': 1542651819,
'upload_date': '20181119',
'description': 'duality of bird',
'uploader_id': 'todaysbird',
'uploader_url': 'https://todaysbird.tumblr.com/',
@@ -238,6 +246,8 @@ class TumblrIE(InfoExtractor):
'info_dict': {
'id': '730460905855467520',
'uploader_id': 'felixcosm',
'upload_date': '20231006',
'timestamp': 1696621805,
'repost_count': int,
'tags': 'count:15',
'description': 'md5:2eb3482a3c6987280cbefb6839068f32',
@@ -327,6 +337,8 @@ class TumblrIE(InfoExtractor):
'url': 'https://www.tumblr.com/anyaboz/765332564457209856/my-music-video-for-selkie-by-nobodys-wolf-child',
'info_dict': {
'id': '765332564457209856',
'timestamp': 1729878010,
'upload_date': '20241025',
'uploader_id': 'anyaboz',
'repost_count': int,
'age_limit': 0,
@@ -445,6 +457,8 @@ class TumblrIE(InfoExtractor):
'uploader_id': uploader_id,
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
**traverse_obj(post_json, {
# Try oldest post in reblog chain, fall back to timestamp of the post itself
'timestamp': ((('trail', 0, 'post'), None), 'timestamp', {int_or_none}, any),
'like_count': ('like_count', {int_or_none}),
'repost_count': ('reblog_count', {int_or_none}),
'tags': ('tags', ..., {str}),

View File

@@ -1,14 +1,18 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
make_archive_id,
parse_age_limit,
smuggle_url,
try_get,
remove_end,
)
from ..utils.traversal import traverse_obj
class TV5UnisBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['CA']
_GEO_BYPASS = False
def _real_extract(self, url):
groups = self._match_valid_url(url).groups()
@@ -16,96 +20,136 @@ class TV5UnisBaseIE(InfoExtractor):
'https://api.tv5unis.ca/graphql', groups[0], query={
'query': '''{
%s(%s) {
title
summary
tags
duration
seasonNumber
episodeNumber
collection {
title
}
episodeNumber
rating {
name
}
seasonNumber
tags
title
videoElement {
__typename
... on Video {
mediaId
encodings {
hls {
url
}
}
}
... on RestrictedVideo {
code
reason
}
}
}
}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)), # noqa: UP031
})['data'][self._GQL_QUERY_NAME]
media_id = product['videoElement']['mediaId']
video = product['videoElement']
if video is None:
raise ExtractorError('This content is no longer available', expected=True)
if video.get('__typename') == 'RestrictedVideo':
code = video.get('code')
if code == 1001:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
reason = video.get('reason')
raise ExtractorError(join_nonempty(
'This video is restricted',
code is not None and f', error code {code}',
reason and f': {remove_end(reason, ".")}',
delim=''))
media_id = video['mediaId']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
video['encodings']['hls']['url'], media_id, 'mp4')
return {
'_type': 'url_transparent',
'id': media_id,
'title': product.get('title'),
'url': smuggle_url('limelight:media:' + media_id, {'geo_countries': self._GEO_COUNTRIES}),
'age_limit': parse_age_limit(try_get(product, lambda x: x['rating']['name'])),
'tags': product.get('tags'),
'series': try_get(product, lambda x: x['collection']['title']),
'season_number': int_or_none(product.get('seasonNumber')),
'episode_number': int_or_none(product.get('episodeNumber')),
'ie_key': 'LimelightMedia',
'_old_archive_ids': [make_archive_id('LimelightMedia', media_id)],
'formats': formats,
'subtitles': subtitles,
**traverse_obj(product, {
'title': ('title', {str}),
'description': ('summary', {str}),
'tags': ('tags', ..., {str}),
'duration': ('duration', {int_or_none}),
'season_number': ('seasonNumber', {int_or_none}),
'episode_number': ('episodeNumber', {int_or_none}),
'series': ('collection', 'title', {str}),
'age_limit': ('rating', 'name', {parse_age_limit}),
}),
}
class TV5UnisVideoIE(TV5UnisBaseIE):
_WORKING = False
IE_NAME = 'tv5unis:video'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.tv5unis.ca/videos/bande-annonces/71843',
'md5': '3d794164928bda97fb87a17e89923d9b',
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tv5unis.ca/videos/bande-annonces/144041',
'md5': '24a247c96119d77fe1bae8b440457dfa',
'info_dict': {
'id': 'a883684aecb2486cad9bdc7bbe17f861',
'id': '56862325352147149dce0ae139afced6',
'_old_archive_ids': ['limelightmedia 56862325352147149dce0ae139afced6'],
'ext': 'mp4',
'title': 'Watatatow',
'duration': 10.01,
'title': 'Antigone',
'description': r"re:En aidant son frère .+ dicté par l'amour et la solidarité.",
'duration': 61,
},
}
}]
_GQL_QUERY_NAME = 'productById'
@staticmethod
def _gql_args(groups):
return f'id: {groups}'
return f'id: {groups[0]}'
class TV5UnisIE(TV5UnisBaseIE):
_WORKING = False
IE_NAME = 'tv5unis'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/?#]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
_TESTS = [{
'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/6/episodes/1',
'md5': 'a479907d2e531a73e1f8dc48d6388d02',
# geo-restricted to Canada; xff is ineffective
'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/11/episodes/1',
'md5': '43beebd47eefb1c5caf9a47a3fc35589',
'info_dict': {
'id': 'e5ee23a586c44612a56aad61accf16ef',
'id': '2c06e4af20f0417b86c2536825287690',
'_old_archive_ids': ['limelightmedia 2c06e4af20f0417b86c2536825287690'],
'ext': 'mp4',
'title': 'Je ne peux pas lui résister',
'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...",
'title': "L'homme éléphant",
'description': r're:Paul-André et Jean-Yves, .+ quand elle parle du feu au Spot.',
'subtitles': {
'fr': 'count:1',
},
'duration': 1370,
'duration': 1440,
'age_limit': 8,
'tags': 'count:3',
'tags': 'count:4',
'series': 'Watatatow',
'season_number': 6,
'season': 'Season 11',
'season_number': 11,
'episode': 'Episode 1',
'episode_number': 1,
},
}, {
'url': 'https://www.tv5unis.ca/videos/le-voyage-de-fanny',
'md5': '9ca80ebb575c681d10cae1adff3d4774',
# geo-restricted to Canada; xff is ineffective
'url': 'https://www.tv5unis.ca/videos/boite-a-savon',
'md5': '7898e868e8c540f03844660e0aab6bbe',
'info_dict': {
'id': '726188eefe094d8faefb13381d42bc06',
'id': '4de6d0c6467b4511a0c04b92037a9f15',
'_old_archive_ids': ['limelightmedia 4de6d0c6467b4511a0c04b92037a9f15'],
'ext': 'mp4',
'title': 'Le voyage de Fanny',
'description': "Fanny, 12 ans, cachée dans un foyer loin de ses parents, s'occupe de ses deux soeurs. Devant fuir, Fanny prend la tête d'un groupe de huit enfants et s'engage dans un dangereux périple à travers la France occupée pour rejoindre la frontière suisse.",
'title': 'Boîte à savon',
'description': r're:Dans le petit village de Broche-à-foin, .+ celle qui fait battre son coeur.',
'subtitles': {
'fr': 'count:1',
},
'duration': 5587.034,
'tags': 'count:4',
'duration': 1200,
'tags': 'count:5',
},
}]
_GQL_QUERY_NAME = 'productByRootProductSlug'

View File

@@ -680,6 +680,10 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
}],
f'Downloading {self._NODE_KIND}s GraphQL page {page_num}',
fatal=False)
# Avoid extracting random/unrelated entries when channel_name doesn't exist
# See https://github.com/yt-dlp/yt-dlp/issues/15450
if traverse_obj(page, (0, 'data', 'user', 'id', {str})) == '':
raise ExtractorError(f'Channel "{channel_name}" not found', expected=True)
if not page:
break
edges = try_get(

View File

@@ -32,67 +32,11 @@ from ..utils.traversal import require, traverse_obj
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
_API_BASE = 'https://api.x.com/1.1/'
_GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
_flow_token = None
_LOGIN_INIT_DATA = json.dumps({
'input_flow_data': {
'flow_context': {
'debug_overrides': {},
'start_location': {
'location': 'unknown',
},
},
},
'subtask_versions': {
'action_list': 2,
'alert_dialog': 1,
'app_download_cta': 1,
'check_logged_in_account': 1,
'choice_selection': 3,
'contacts_live_sync_permission_prompt': 0,
'cta': 7,
'email_verification': 2,
'end_flow': 1,
'enter_date': 1,
'enter_email': 2,
'enter_password': 5,
'enter_phone': 2,
'enter_recaptcha': 1,
'enter_text': 5,
'enter_username': 2,
'generic_urt': 3,
'in_app_notification': 1,
'interest_picker': 3,
'js_instrumentation': 1,
'menu_dialog': 1,
'notifications_permission_prompt': 2,
'open_account': 2,
'open_home_timeline': 1,
'open_link': 1,
'phone_verification': 4,
'privacy_options': 1,
'security_key': 3,
'select_avatar': 4,
'select_banner': 2,
'settings_list': 7,
'show_code': 1,
'sign_up': 2,
'sign_up_review': 4,
'tweet_selection_urt': 1,
'update_users': 1,
'upload_media': 1,
'user_recommendations_list': 4,
'user_recommendations_urt': 1,
'wait_spinner': 3,
'web_modal': 1,
},
}, separators=(',', ':')).encode()
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
@@ -172,135 +116,6 @@ class TwitterBaseIE(InfoExtractor):
'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
})
def _call_login_api(self, note, headers, query={}, data=None):
response = self._download_json(
f'{self._API_BASE}onboarding/task.json', None, note,
headers=headers, query=query, data=data, expected_status=400)
error = traverse_obj(response, ('errors', 0, 'message', {str}))
if error:
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
elif traverse_obj(response, 'status') != 'success':
raise ExtractorError('Login was unsuccessful')
subtask = traverse_obj(
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
if not subtask:
raise ExtractorError('Twitter API did not return next login subtask')
self._flow_token = response['flow_token']
return subtask
def _perform_login(self, username, password):
if self.is_logged_in:
return
guest_token = self._fetch_guest_token(None)
headers = {
**self._set_base_headers(),
'content-type': 'application/json',
'x-guest-token': guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
'Referer': 'https://x.com/',
'Origin': 'https://x.com',
}
def build_login_json(*subtask_inputs):
return json.dumps({
'flow_token': self._flow_token,
'subtask_inputs': subtask_inputs,
}, separators=(',', ':')).encode()
def input_dict(subtask_id, text):
return {
'subtask_id': subtask_id,
'enter_text': {
'text': text,
'link': 'next_link',
},
}
next_subtask = self._call_login_api(
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
while not self.is_logged_in:
if next_subtask == 'LoginJsInstrumentationSubtask':
next_subtask = self._call_login_api(
'Submitting JS instrumentation response', headers, data=build_login_json({
'subtask_id': next_subtask,
'js_instrumentation': {
'response': '{}',
'link': 'next_link',
},
}))
elif next_subtask == 'LoginEnterUserIdentifierSSO':
next_subtask = self._call_login_api(
'Submitting username', headers, data=build_login_json({
'subtask_id': next_subtask,
'settings_list': {
'setting_responses': [{
'key': 'user_identifier',
'response_data': {
'text_data': {
'result': username,
},
},
}],
'link': 'next_link',
},
}))
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
next_subtask = self._call_login_api(
'Submitting alternate identifier', headers,
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
'one of username, phone number or email that was not used as --username'))))
elif next_subtask == 'LoginEnterPassword':
next_subtask = self._call_login_api(
'Submitting password', headers, data=build_login_json({
'subtask_id': next_subtask,
'enter_password': {
'password': password,
'link': 'next_link',
},
}))
elif next_subtask == 'AccountDuplicationCheck':
next_subtask = self._call_login_api(
'Submitting account duplication check', headers, data=build_login_json({
'subtask_id': next_subtask,
'check_logged_in_account': {
'link': 'AccountDuplicationCheck_false',
},
}))
elif next_subtask == 'LoginTwoFactorAuthChallenge':
next_subtask = self._call_login_api(
'Submitting 2FA token', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('two-factor authentication token'))))
elif next_subtask == 'LoginAcid':
next_subtask = self._call_login_api(
'Submitting confirmation code', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
elif next_subtask == 'ArkoseLogin':
self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
elif next_subtask == 'DenyLoginSubtask':
self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
elif next_subtask == 'LoginSuccessSubtask':
raise ExtractorError('Twitter API did not grant auth token cookie')
else:
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
self.report_login()
def _call_api(self, path, video_id, query={}, graphql=False):
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
headers.update({
@@ -416,6 +231,7 @@ class TwitterCardIE(InfoExtractor):
'live_status': 'not_live',
},
'add_ie': ['Youtube'],
'skip': 'The page does not exist',
},
{
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
@@ -617,6 +433,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'_old_archive_ids': ['twitter 852138619213144067'],
},
'skip': 'Suspended',
}, {
'url': 'https://twitter.com/i/web/status/910031516746514432',
'info_dict': {
@@ -763,10 +580,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': {
'id': '1577719286659006464',
'title': 'Ultima - Test',
'title': r're:Ultima.* - Test$',
'description': 'Test https://t.co/Y3KEZD7Dad',
'channel_id': '168922496',
'uploader': 'Ultima',
'uploader': r're:Ultima.*',
'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005',
@@ -895,11 +712,12 @@ class TwitterIE(TwitterBaseIE):
'uploader': r're:Monique Camarra.+?',
'uploader_id': 'MoniqueCamarra',
'live_status': 'was_live',
'release_timestamp': 1658417414,
'release_timestamp': 1658417305,
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
'timestamp': 1658407771,
'release_date': '20220721',
'upload_date': '20220721',
'thumbnail': 'https://pbs.twimg.com/profile_images/1920514378006188033/xQs6J_yI_400x400.jpg',
},
'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'},
@@ -1010,10 +828,10 @@ class TwitterIE(TwitterBaseIE):
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
'age_limit': 0,
'uploader': 'Boy Called Mün',
'uploader': 'D U N I Y A',
'repost_count': int,
'upload_date': '20221206',
'title': 'Boy Called Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
'title': 'D U N I Y A - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
'comment_count': int,
'like_count': int,
'tags': [],
@@ -1068,6 +886,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int,
'_old_archive_ids': ['twitter 1695424220702888009'],
},
'skip': 'Suspended',
}, {
# retweeted_status w/ legacy API
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
@@ -1092,6 +911,7 @@ class TwitterIE(TwitterBaseIE):
'_old_archive_ids': ['twitter 1695424220702888009'],
},
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
'skip': 'Suspended',
}, {
# Broadcast embedded in tweet
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
@@ -1135,7 +955,6 @@ class TwitterIE(TwitterBaseIE):
}, {
# "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': {
'id': '1724883339285544960',
'ext': 'mp4',
@@ -1182,6 +1001,30 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 0,
'_old_archive_ids': ['twitter 1790637656616943991'],
},
}, {
# unified_card with 2 items of type video and photo
'url': 'https://x.com/TopHeroes_/status/2001950365332455490',
'info_dict': {
'id': '2001841416071450628',
'ext': 'mp4',
'display_id': '2001950365332455490',
'title': 'Top Heroes - Forgot to close My heroes solo level up in my phone ✨Unlock the fog,...',
'description': r're:Forgot to close My heroes solo level up in my phone ✨Unlock the fog.+',
'uploader': 'Top Heroes',
'uploader_id': 'TopHeroes_',
'uploader_url': 'https://twitter.com/TopHeroes_',
'channel_id': '1737324725620326400',
'comment_count': int,
'like_count': int,
'repost_count': int,
'age_limit': 0,
'duration': 30.278,
'thumbnail': 'https://pbs.twimg.com/amplify_video_thumb/2001841416071450628/img/hpy5KpJh4pO17b65.jpg?name=orig',
'tags': [],
'timestamp': 1766137136,
'upload_date': '20251219',
'_old_archive_ids': ['twitter 2001950365332455490'],
},
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1422,14 +1265,14 @@ class TwitterIE(TwitterBaseIE):
if not card:
return
self.write_debug(f'Extracting from card info: {card.get("url")}')
card_name = card['name'].split(':')[-1]
self.write_debug(f'Extracting from {card_name} card info: {card.get("url")}')
binding_values = card['binding_values']
def get_binding_value(k):
o = binding_values.get(k) or {}
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
card_name = card['name'].split(':')[-1]
if card_name == 'player':
yield {
'_type': 'url',
@@ -1461,7 +1304,7 @@ class TwitterIE(TwitterBaseIE):
elif card_name == 'unified_card':
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
yield from map(extract_from_video_info, traverse_obj(
unified_card, ('media_entities', ...), expected_type=dict))
unified_card, ('media_entities', lambda _, v: v['type'] == 'video')))
# amplify, promo_video_website, promo_video_convo, appplayer,
# video_direct_message, poll2choice_video, poll3choice_video,
# poll4choice_video, ...

View File

@@ -13,55 +13,16 @@ from ..utils import (
parse_iso8601,
traverse_obj,
try_get,
update_url,
url_or_none,
)
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
IE_NAME = 'yahoo'
_VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1']
_TESTS = [{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'info_dict': {
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'ext': 'mp4',
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
'description': 'Julian and Travis watch Julian Smith',
'duration': 6863,
'timestamp': 1369812016,
'upload_date': '20130529',
},
'skip': 'No longer exists',
}, {
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
'md5': '7993e572fac98e044588d0b5260f4352',
'info_dict': {
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
'ext': 'mp4',
'title': "Yahoo Saves 'Community'",
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
'duration': 170,
'timestamp': 1406838636,
'upload_date': '20140731',
},
'skip': 'Unfortunately, this video is not available in your region',
}, {
'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
'md5': '71298482f7c64cbb7fa064e4553ff1c1',
'info_dict': {
'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
'ext': 'webm',
'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
'description': 'md5:f66c890e1490f4910a9953c941dee944',
'duration': 97,
'timestamp': 1414489862,
'upload_date': '20141028',
},
'skip': 'No longer exists',
}, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'url': 'https://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '88e209b417f173d86186bef6e4d1f160',
'info_dict': {
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
@@ -69,27 +30,33 @@ class YahooIE(InfoExtractor):
'title': 'China Moses Is Crazy About the Blues',
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
'duration': 128,
'timestamp': 1385722202,
'timestamp': 1385721938,
'upload_date': '20131129',
'display_id': 'china-moses-crazy-blues-104538833',
'view_count': int,
'thumbnail': r're:https://media\.zenfs\.com/.+',
},
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
'md5': '2a9752f74cb898af5d1083ea9f661b58',
# 'md5': '989396ae73d20c6f057746fb226aa215', # varies between this and 'b17ac378b1134fa44370fb27db09a744'
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': '\'True Story\' Trailer',
'description': 'True Story',
'duration': 150,
'timestamp': 1418919206,
'timestamp': 1418923800,
'upload_date': '20141218',
'display_id': 'true-story-trailer-173000497',
'view_count': int,
'thumbnail': r're:https://media\.zenfs\.com/.+\.jpg',
},
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
}, {
'note': 'NBC Sports embeds',
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
'url': 'https://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
@@ -99,26 +66,10 @@ class YahooIE(InfoExtractor):
'uploader': 'NBCU-SPORTS',
'timestamp': 1426270238,
},
'skip': 'Page no longer has video',
}, {
'url': 'https://tw.news.yahoo.com/-100120367.html',
'only_matching': True,
}, {
# Query result is embedded in webpage, but explicit request to video API fails with geo restriction
'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
'info_dict': {
'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
'ext': 'mp4',
'title': 'Communitary - Community Episode 1: Ladders',
'description': 'md5:8fc39608213295748e1e289807838c97',
'duration': 1646,
'timestamp': 1440436550,
'upload_date': '20150824',
'series': 'Communitary',
'season_number': 6,
'episode_number': 1,
},
'skip': 'No longer exists',
}, {
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
@@ -129,12 +80,16 @@ class YahooIE(InfoExtractor):
'description': '中文版預',
'timestamp': 1476696196,
'upload_date': '20161017',
'view_count': int,
'duration': 141,
'thumbnail': r're:https://media\.zenfs\.com/.+\.jpg',
'series': '電影',
'display_id': '單車天使-中文版預-092316541',
},
'params': {
'skip_download': True,
},
}, {
# Contains both a Yahoo hosted video and multiple Youtube embeds
'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
'info_dict': {
'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
@@ -147,24 +102,29 @@ class YahooIE(InfoExtractor):
'ext': 'mp4',
'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
'timestamp': 1572406500,
'timestamp': 1572406499,
'upload_date': '20191030',
},
}, {
'info_dict': {
'id': '352CFDOQrKg',
'ext': 'mp4',
'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
'description': 'md5:7fe8e3d5806f96002e55f190d1d94479',
'uploader': 'The Voice',
'uploader_id': 'NBCTheVoice',
'upload_date': '20191029',
'display_id': 'gwen-stefani-reveals-she-turned-033459311',
'view_count': int,
'duration': 97,
'thumbnail': 'https://s.yimg.com/os/creatr-uploaded-images/2019-10/348bb330-fac6-11e9-8d27-38e85d573702',
'series': 'Last Night Now',
},
}],
'params': {
'playlistend': 2,
}, {
'url': 'https://sports.yahoo.com/video/rams-lose-grip-nfcs-top-174614409.html',
'info_dict': {
'id': '6b15f100-cf5c-3ad0-9c96-87cbd2f72d4a',
'ext': 'mp4',
'display_id': 'rams-lose-grip-nfcs-top-174614409',
'title': 'Rams lose their grip on NFC\'s top seed — can they still secure the bye?',
'description': 'md5:5f4f98ab3c4de80e54c105b6bbb1d024',
'view_count': int,
'duration': 85,
'thumbnail': 'https://s.yimg.com/os/creatr-uploaded-images/2025-12/94fc4840-dd02-11f0-beff-38ba3a4992e3',
'timestamp': 1766166374,
'upload_date': '20251219',
},
'expected_warnings': ['HTTP Error 404', 'Ignoring subtitle tracks'],
}, {
'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
'only_matching': True,
@@ -178,14 +138,12 @@ class YahooIE(InfoExtractor):
def _extract_yahoo_video(self, video_id, country):
video = self._download_json(
f'https://{country}.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["{video_id}"]',
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
f'https://video-api.yql.yahoo.com/v1/video/sapi/streams/{video_id}',
video_id, 'Downloading video JSON metadata')['query']['results']['mediaObj'][0]['meta']
if country == 'malaysia':
country = 'my'
is_live = video.get('live_state') == 'live'
is_live = traverse_obj(video, ('uplynk_live', {bool})) is True
fmts = ('m3u8',) if is_live else ('webm', 'mp4')
urls = []
@@ -231,43 +189,23 @@ class YahooIE(InfoExtractor):
'ext': mimetype2ext(cc.get('content_type')),
})
streaming_url = video.get('streaming_url')
if streaming_url and not is_live:
formats.extend(self._extract_m3u8_formats(
streaming_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
if not formats and msg == 'geo restricted':
self.raise_geo_restricted(metadata_available=True)
thumbnails = []
for thumb in video.get('thumbnails', []):
thumb_url = thumb.get('url')
if not thumb_url:
continue
thumbnails.append({
'id': thumb.get('tag'),
'url': thumb.get('url'),
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
})
series_info = video.get('series_info') or {}
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
'subtitles': subtitles,
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('view_count')),
'is_live': is_live,
'series': video.get('show_name'),
'season_number': int_or_none(series_info.get('season_number')),
'episode_number': int_or_none(series_info.get('episode_number')),
**traverse_obj(video, {
'title': ('title', {clean_html}),
'description': ('description', {clean_html}),
'thumbnail': ('thumbnail', {url_or_none}, {update_url(scheme='https')}),
'timestamp': ('publish_time', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'view_count': ('view_count', {int_or_none}),
'series': ('show_name', {str}, filter),
}),
}
def _real_extract(self, url):
@@ -321,14 +259,13 @@ class YahooIE(InfoExtractor):
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'
_MAX_RESULTS = 1000
IE_NAME = 'screen.yahoo:search'
IE_NAME = 'yahoo:search'
_SEARCH_KEY = 'yvsearch'
def _search_results(self, query):
for pagenum in itertools.count(0):
result_url = f'http://video.search.yahoo.com/search/?p={urllib.parse.quote_plus(query)}&fr=screen&o=js&gs=0&b={pagenum * 30}'
result_url = f'https://video.search.yahoo.com/search/?p={urllib.parse.quote_plus(query)}&fr=screen&o=js&gs=0&b={pagenum * 30}'
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
yield from (self.url_result(result['rurl']) for result in info['results'])

View File

@@ -1065,7 +1065,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return next_continuation
return traverse_obj(renderer, (
('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
('contents', 'items', 'rows', 'subThreads'), ..., 'continuationItemRenderer',
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
), get_all=False, expected_type=cls._extract_continuation_ep_data)

View File

@@ -382,7 +382,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
('accessibilityText', {lambda x: re.fullmatch(r'(.+), (?:[\d,.]+(?:[KM]| million)?|No) views? - play Short', x)}, 1)), any),
'view_count': ('overlayMetadata', 'secondaryText', 'content', {parse_count}),
}),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
thumbnails=self._extract_thumbnails(
renderer, ('thumbnailViewModel', 'thumbnailViewModel', 'image'), final_key='sources'))
return
def _video_entry(self, video_renderer):
@@ -1585,7 +1586,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 50,
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': {
@@ -1885,8 +1885,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 30,
}, {
# Shorts url result in shorts tab
# TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA',
@@ -1915,7 +1913,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True},
}, {
# Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg',

View File

@@ -1660,6 +1660,71 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'live_status': 'not_live',
},
'params': {'skip_download': True},
}, {
# Comment subthreads with 4 levels of depth
'url': 'https://www.youtube.com/watch?v=f6HNySwZV4c',
'info_dict': {
'id': 'f6HNySwZV4c',
'ext': 'mp4',
'title': 'dlptestvideo2',
'description': '',
'media_type': 'video',
'uploader': 'cole-dlp-test-acc',
'uploader_id': '@coletdjnz',
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'channel': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 5,
'thumbnail': 'https://i.ytimg.com/vi/f6HNySwZV4c/maxresdefault.jpg',
'categories': ['People & Blogs'],
'tags': [],
'timestamp': 1709856007,
'upload_date': '20240308',
'release_timestamp': 1709856007,
'release_date': '20240308',
'playable_in_embed': True,
'availability': 'public',
'live_status': 'not_live',
'comment_count': 15, # XXX: minimum
},
'params': {
'skip_download': True,
'getcomments': True,
},
}, {
# Comments: `subThreads` containing `commentThreadRenderer`s AND `continuationItemRenderer`
'url': 'https://www.youtube.com/watch?v=3dHQb2Nhma0',
'info_dict': {
'id': '3dHQb2Nhma0',
'ext': 'mp4',
'title': 'Tɪtle',
'description': '',
'media_type': 'video',
'uploader': 'abcdefg',
'uploader_id': '@abcdefg-d5t2c',
'uploader_url': 'https://www.youtube.com/@abcdefg-d5t2c',
'channel': 'abcdefg',
'channel_id': 'UCayEJzV8XSSJkPdA7OAsbew',
'channel_url': 'https://www.youtube.com/channel/UCayEJzV8XSSJkPdA7OAsbew',
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 12,
'thumbnail': 'https://i.ytimg.com/vi/3dHQb2Nhma0/maxresdefault.jpg',
'categories': ['People & Blogs'],
'tags': [],
'timestamp': 1767158812,
'upload_date': '20251231',
'playable_in_embed': True,
'availability': 'unlisted',
'live_status': 'not_live',
'comment_count': 9, # XXX: minimum
},
}]
_WEBPAGE_TESTS = [{
# <object>
@@ -2402,7 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return info
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
@@ -2434,11 +2499,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
return _continuation
def extract_thread(contents, entity_payloads):
if not parent:
def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
if not thread_parent:
tracker['current_page_thread'] = 0
if max_depth < thread_depth:
return
for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents:
if not thread_parent and tracker['total_parent_comments'] >= max_parents:
yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
@@ -2448,7 +2517,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
expected_type=dict, default={})
comment = self._extract_comment_old(comment_renderer, parent)
comment = self._extract_comment_old(comment_renderer, thread_parent)
# new comment format
else:
@@ -2459,7 +2528,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not comment_keys:
continue
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
comment = self._extract_comment(entities, parent)
comment = self._extract_comment(entities, thread_parent)
if comment:
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
@@ -2478,13 +2547,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
self.report_warning(
'Detected YouTube comments looping. Stopping comment extraction '
f'{"for this thread" if parent else ""} as we probably cannot get any more.')
f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
yield
break # Safeguard for recursive call in subthreads code path below
else:
tracker['seen_comment_ids'].add(comment['id'])
tracker['seen_comment_ids'].add(comment_id)
tracker['running_total'] += 1
tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
yield comment
# Attempt to get the replies
@@ -2492,10 +2562,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
if comment_replies_renderer:
subthreads = traverse_obj(comment_replies_renderer, ('subThreads', ..., {dict}))
# Recursively extract from `commentThreadRenderer`s in `subThreads`
if threads := traverse_obj(subthreads, lambda _, v: v['commentThreadRenderer']):
for entry in extract_thread(threads, entity_payloads, comment_id, thread_depth + 1):
if entry:
yield entry
if not traverse_obj(subthreads, lambda _, v: v['continuationItemRenderer']):
# All of the subThreads' `continuationItemRenderer`s were within the nested
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
continue
tracker['current_page_thread'] += 1
# Recursively extract from `continuationItemRenderer` in `subThreads`
comment_entries_iter = self._comment_entries(
comment_replies_renderer, ytcfg, video_id,
parent=comment.get('id'), tracker=tracker)
parent=comment_id, tracker=tracker, depth=thread_depth + 1)
yield from itertools.islice(comment_entries_iter, min(
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
@@ -2511,17 +2593,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'pinned_comment_ids': set(),
}
# TODO: Deprecated
# YouTube comments have a max depth of 2
max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
if max_depth:
self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
'Set max replies in the max-comments extractor argument instead')
if max_depth == 1 and parent:
return
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
_max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
if max_depth < depth:
return
continuation = self._extract_continuation(root_continuation_data)
@@ -2550,6 +2626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
tracker['current_page_thread'], comment_prog_str)
else:
# TODO: `parent` is only truthy in this code path with YT's legacy (non-threaded) comment view
note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
' ' if parent else '', ' replies' if parent else '',
page_num, comment_prog_str)
@@ -2566,6 +2643,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
check_get_keys=check_get_keys)
except ExtractorError as e:
# TODO: This code path is not reached since eb5bdbfa70126c7d5355cc0954b63720522e462c
# Ignore incomplete data error for replies if retries didn't work.
# This is to allow any other parent comments and comment threads to be downloaded.
# See: https://github.com/yt-dlp/yt-dlp/issues/4669
@@ -2592,7 +2670,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
continue
for entry in extract_thread(continuation_items, mutations):
for entry in extract_thread(continuation_items, mutations, parent, depth):
if not entry:
return
yield entry
@@ -3307,6 +3385,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def process_https_formats():
proto = 'https'
https_fmts = []
skip_player_js = 'js' in self._configuration_arg('player_skip')
for fmt_stream in streaming_formats:
if fmt_stream.get('targetDurationSec'):
continue
@@ -3344,13 +3424,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
if not all((sc, fmt_url, player_url, encrypted_sig)):
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
if not all((sc, fmt_url, skip_player_js or player_url, encrypted_sig)):
msg = f'Some {client_name} client https formats have been skipped as they are missing a URL. '
if client_name in ('web', 'web_safari'):
msg += 'YouTube is forcing SABR streaming for this client. '
else:
msg += (
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
f'YouTube may have enabled the SABR-only streaming experiment for '
f'{"your account" if self.is_authenticated else "the current session"}. '
)
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
@@ -3366,6 +3446,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# signature
# Attempt to load sig spec from cache
if encrypted_sig:
if skip_player_js:
continue
spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
spec = self._load_sig_spec_from_cache(spec_cache_id)
if spec:
@@ -3379,6 +3461,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# n challenge
query = parse_qs(fmt_url)
if query.get('n'):
if skip_player_js:
continue
n_challenge = query['n'][0]
if n_challenge in self._player_cache:
fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})

View File

@@ -127,6 +127,7 @@ class ZDFBaseIE(InfoExtractor):
**parse_codecs(quality.get('mimeCodec')),
'height': height,
'width': width,
'filesize': int_or_none(variant.get('filesize')),
'format_id': join_nonempty('http', stream.get('type')),
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)),
}]
@@ -193,8 +194,10 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?zdf\.de/(?:video|play)/(?:[^/?#]+/)*(?P<id>[^/?#]+)',
# /nachrichten/ sub-site URLs and legacy redirects from before the redesign in 2025-03
# Legacy redirects from before the redesign in 2025-03 or from before sister sites moved to their own domains
r'https?://(?:www\.)?zdf\.de/(?:[^/?#]+/)*(?P<id>[^/?#]+)\.html',
# Sister sites
r'https?://(?:www\.)?(?:zdfheute|logo)\.de/(?:[^/?#]+/)*(?P<id>[^/?#]+)\.html',
]
IE_NAME = 'zdf'
_TESTS = [{
@@ -206,7 +209,7 @@ class ZDFIE(ZDFBaseIE):
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
@@ -222,7 +225,7 @@ class ZDFIE(ZDFBaseIE):
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
@@ -239,7 +242,7 @@ class ZDFIE(ZDFBaseIE):
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
@@ -249,67 +252,66 @@ class ZDFIE(ZDFBaseIE):
'params': {'skip_download': True},
}, {
# Video belongs to a playlist, video URL
'url': 'https://www.zdf.de/video/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'md5': '1eda17eb40a9ead3046326e10b9c5973',
# Also: video mirrored from ARD Mediathek
'url': 'https://www.zdf.de/video/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'md5': '84980c1a0148da6cd94de58333d7e1ee',
'info_dict': {
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Von Königspurpur bis Jeansblau',
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
'duration': 2615.0,
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'die-magie-der-farben-116',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 2',
'episode_number': 2,
'timestamp': 1445797800,
'upload_date': '20151025',
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
'episode_number': 5,
'timestamp': 1690902120,
'upload_date': '20230801',
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
},
}, {
# Video belongs to a playlist, play URL
'url': 'https://www.zdf.de/play/dokus/die-magie-der-farben-116/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'md5': '1eda17eb40a9ead3046326e10b9c5973',
'url': 'https://www.zdf.de/play/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'info_dict': {
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Von Königspurpur bis Jeansblau',
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
'duration': 2615.0,
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'die-magie-der-farben-116',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 2',
'episode_number': 2,
'timestamp': 1445797800,
'upload_date': '20151025',
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
'episode_number': 5,
'timestamp': 1690902120,
'upload_date': '20230801',
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
},
'params': {'skip_download': True},
}, {
# Video belongs to a playlist, legacy URL before website redesign in 2025-03
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'md5': '1eda17eb40a9ead3046326e10b9c5973',
'url': 'https://www.zdf.de/dokus/collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132/page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102.html',
'info_dict': {
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'id': 'page-video-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102',
'ext': 'mp4',
'title': 'Von Königspurpur bis Jeansblau',
'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
'duration': 2615.0,
'thumbnail': 'https://www.zdf.de/assets/koenigspurpur-bis-jeansblau-100~original?cb=1741857765971',
'title': 'Gelb: Vom hellen Glanz zu finsteren Abgründen',
'description': 'md5:9aad4806b4c8ea152ab21e70c9d516be',
'duration': 895.0,
'thumbnail': r're:https://www\.zdf\.de/assets/image-ard-gelb-vom-hellen-glanz-zu-finsteren-abgruenden-102~original\?cb=\d+',
'series': 'Die Magie der Farben',
'series_id': 'die-magie-der-farben-116',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 2',
'episode_number': 2,
'timestamp': 1445797800,
'upload_date': '20151025',
'_old_archive_ids': ['zdf 151025_magie_farben2_tex'],
'series_id': 'collection-index-page-ard-collection-ard-dxjuomfyzdpzag93ojy2mzhhmmq3mzk2ztq4nda-132',
'season': 'Season 2023',
'season_number': 2023,
'episode': 'Episode 5',
'episode_number': 5,
'timestamp': 1690902120,
'upload_date': '20230801',
'_old_archive_ids': ['zdf video_ard_dXJuOmFyZDpwdWJsaWNhdGlvbjo0YTYyOTJjM2Q0ZThlNmY1'],
},
'params': {'skip_download': True},
}, {
@@ -337,7 +339,50 @@ class ZDFIE(ZDFBaseIE):
'_old_archive_ids': ['zdf 211219_sendung_hjo'],
},
}, {
# Video that requires fallback extraction
# FUNK video (hosted on a different CDN, has atypical PTMD and HLS files)
'url': 'https://www.zdf.de/video/serien/funk-collection-funk-11790-1596/funk-alles-ist-verzaubert-102',
'md5': '57af4423db0455a3975d2dc4578536bc',
'info_dict': {
'id': 'funk-alles-ist-verzaubert-102',
'ext': 'mp4',
'title': 'Alles ist verzaubert',
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
'duration': 1278.0,
'thumbnail': r're:https://www\.zdf\.de/assets/teaser-funk-alles-ist-verzaubert-102~original\?cb=\d+',
'series': 'DRUCK',
'series_id': 'funk-collection-funk-11790-1596',
'season': 'Season 2021',
'season_number': 2021,
'episode': 'Episode 50',
'episode_number': 50,
'timestamp': 1635520560,
'upload_date': '20211029',
'_old_archive_ids': ['zdf video_funk_1770473'],
},
}, {
# zdfheute video, also available on zdf.de
'url': 'https://www.zdfheute.de/video/heute-journal/heute-journal-vom-19-dezember-2025-100.html',
'md5': '47af8c2cfa30abf74501170f62754c63',
'info_dict': {
'id': 'heute-journal-vom-19-dezember-2025-100',
'ext': 'mp4',
'title': 'heute journal vom 19. Dezember 2025',
'description': 'md5:fd0dfbce0783486db839ff9140a8074b',
'duration': 1780.0,
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/273e5545-16e7-4ca3-898e-52fe9e06d964?layout=2400x1350',
'chapters': 'count:10',
'series': 'heute journal',
'series_id': 'heute-journal-104',
'season': 'Season 2025',
'season_number': 2025,
'episode': 'Episode 365',
'episode_number': 365,
'timestamp': 1766178000,
'upload_date': '20251219',
'_old_archive_ids': ['zdf 251219_2200_sendung_hjo'],
},
}, {
# zdfheute video, not available on zdf.de (uses the fallback extraction path)
'url': 'https://www.zdf.de/nachrichten/politik/deutschland/koalitionsverhandlungen-spd-cdu-csu-dobrindt-100.html',
'md5': 'c3a78514dd993a5781aa3afe50db51e2',
'info_dict': {
@@ -346,71 +391,50 @@ class ZDFIE(ZDFBaseIE):
'title': 'Dobrindt schließt Steuererhöhungen aus',
'description': 'md5:9a117646d7b8df6bc902eb543a9c9023',
'duration': 325,
'thumbnail': 'https://www.zdfheute.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736',
'thumbnail': r're:https://www\.zdfheute\.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080\?cb=\d+',
'timestamp': 1743374520,
'upload_date': '20250330',
'_old_archive_ids': ['zdf 250330_clip_2_bdi'],
},
}, {
# FUNK video (hosted on a different CDN, has atypical PTMD and HLS files)
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
'md5': '57af4423db0455a3975d2dc4578536bc',
# logo! video, also available on zdf.de
'url': 'https://www.logo.de/logo-vom-freitag-19-dezember-2025-102.html',
'md5': 'cfb1a0988b1249f052a437a55851134b',
'info_dict': {
'id': 'funk-alles-ist-verzaubert-102',
'id': 'logo-vom-freitag-19-dezember-2025-102',
'ext': 'mp4',
'title': 'Alles ist verzaubert',
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
'duration': 1278.0,
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~original?cb=1663848412907',
'series': 'DRUCK',
'series_id': 'funk-collection-funk-11790-1590',
'season': 'Season 7',
'season_number': 7,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1635520560,
'upload_date': '20211029',
'_old_archive_ids': ['zdf video_funk_1770473'],
'title': 'logo! vom Freitag, 19. Dezember 2025',
'description': 'md5:971428cb563e924c153580f23870c613',
'duration': 490.0,
'thumbnail': r're:https://www\.zdf\.de/assets/iran-rote-erde-sendung-19-dezember-2025-100~original\?cb=\d+',
'chapters': 'count:7',
'series': 'logo!',
'series_id': 'logo-154',
'season': 'Season 2025',
'season_number': 2025,
'episode': 'Episode 382',
'episode_number': 382,
'timestamp': 1766168700,
'upload_date': '20251219',
'_old_archive_ids': ['zdf 251219_1925_sendung_log'],
},
}, {
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
# logo! video, not available on zdf.de (uses the fallback extraction path)
'url': 'https://www.logo.de/kinderreporter-vivaan-trifft-alina-grijseels-100.html',
'md5': '094cea026babb67aa25fd0108400bc12',
'info_dict': {
'id': 'das-geld-anderer-leute-100',
'id': 'kinderreporter-vivaan-trifft-alina-grijseels-100',
'ext': 'mp4',
'title': 'Das Geld anderer Leute',
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
'duration': 2581.0,
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=1920x1080',
'series': 'SOKO Stuttgart',
'series_id': 'soko-stuttgart-104',
'season': 'Season 11',
'season_number': 11,
'episode': 'Episode 10',
'episode_number': 10,
'timestamp': 1728983700,
'upload_date': '20241015',
'_old_archive_ids': ['zdf 191205_1800_sendung_sok8'],
'title': 'Vivaan trifft Handballerin Alina Grijseels',
'description': 'md5:9572e7f4340dda823ea4091a76624da6',
'duration': 166.0,
'thumbnail': r're:https://www\.zdf\.de/assets/vivaan-alina-grijseels-100~original\?cb=\d+',
'series': 'logo!',
'series_id': 'logo-154',
'timestamp': 1766236320,
'upload_date': '20251220',
'_old_archive_ids': ['zdf 251219_kr_alina_grijseels_neu_log'],
},
}, {
'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html',
'info_dict': {
'id': 'begegnung-auf-der-bruecke-100',
'ext': 'webm',
'title': 'Begegnung auf der Brücke',
'description': 'md5:e53a555da87447f7f1207f10353f8e45',
'duration': 3083.0,
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/c5ff1d1f-f5c8-4468-86ac-1b2f1dbecc76?layout=1920x1080',
'series': 'Northern Lights',
'series_id': 'northern-lights-100',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1738546500,
'upload_date': '20250203',
'_old_archive_ids': ['zdf 240319_2310_sendung_not'],
},
'params': {'skip_download': 'geo-restricted http format'},
}, {
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
@@ -617,7 +641,7 @@ class ZDFChannelIE(ZDFBaseIE):
'title': 'Sylt - Deutschlands edles Nordlicht',
'description': 'md5:35407b810c2e1e33efbe15ef6e4c06c3',
'duration': 810.0,
'thumbnail': 'https://www.zdf.de/assets/sylt-118~original?cb=1613992485011',
'thumbnail': r're:https://www\.zdf\.de/assets/sylt-118~original\?cb=\d+',
'series': 'Sylt - Deutschlands edles Nordlicht',
'series_id': 'sylt---deutschlands-edles-nordlicht-movie-100',
'timestamp': 1612462500,
@@ -634,38 +658,24 @@ class ZDFChannelIE(ZDFBaseIE):
},
'playlist_mincount': 2,
}, {
'url': 'https://www.zdf.de/serien/taunuskrimi/',
'info_dict': {
'id': 'taunuskrimi-100',
'title': 'Taunuskrimi',
'description': 'md5:ee7204e9c625c3b611d1274f9d0e3070',
},
'playlist_mincount': 8,
}, {
'url': 'https://www.zdf.de/serien/taunuskrimi/?staffel=1',
'info_dict': {
'id': 'taunuskrimi-100-s1',
'title': 'Taunuskrimi - Season 1',
'description': 'md5:ee7204e9c625c3b611d1274f9d0e3070',
},
'playlist_count': 7,
}, {
# All seasons of playlist
'url': 'https://www.zdf.de/magazine/heute-journal-104',
'info_dict': {
'id': 'heute-journal-104',
'title': 'heute journal',
'description': 'md5:6edad39189abf8431795d3d6d7f986b3',
},
'playlist_mincount': 500,
'playlist_mincount': 366,
}, {
'url': 'https://www.zdf.de/magazine/heute-journal-104?staffel=2024',
# Only selected season
'url': 'https://www.zdf.de/magazine/heute-journal-104?staffel=2025',
'info_dict': {
'id': 'heute-journal-104-s2024',
'title': 'heute journal - Season 2024',
'id': 'heute-journal-104-s2025',
'title': 'heute journal - Season 2025',
'description': 'md5:6edad39189abf8431795d3d6d7f986b3',
},
'playlist_count': 242,
'skip': 'Video count changes daily, needs support for playlist_maxcount',
'playlist_mincount': 1,
'playlist_maxcount': 365,
}]
_PAGE_SIZE = 24

View File

@@ -4,13 +4,15 @@ from ..utils import (
int_or_none,
js_to_json,
parse_filesize,
parse_qs,
parse_resolution,
str_or_none,
traverse_obj,
update_url_query,
url_basename,
urlencode_postdata,
urljoin,
)
from ..utils.traversal import traverse_obj
class ZoomIE(InfoExtractor):
@@ -87,6 +89,7 @@ class ZoomIE(InfoExtractor):
def _real_extract(self, url):
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
query = {}
start_params = traverse_obj(url, {'startTime': ({parse_qs}, 'startTime', -1)})
if url_type == 'share':
webpage = self._get_real_webpage(url, base_url, video_id, 'share')
@@ -94,7 +97,7 @@ class ZoomIE(InfoExtractor):
redirect_path = self._download_json(
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
video_id, note='Downloading share info JSON')['result']['redirectUrl']
url = urljoin(base_url, redirect_path)
url = update_url_query(urljoin(base_url, redirect_path), start_params)
query['continueMode'] = 'true'
webpage = self._get_real_webpage(url, base_url, video_id, 'play')
@@ -103,6 +106,7 @@ class ZoomIE(InfoExtractor):
# When things go wrong, file_id can be empty string
raise ExtractorError('Unable to extract file ID')
query.update(start_params)
data = self._download_json(
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id, query=query,
note='Downloading play info JSON')['result']

View File

@@ -1212,7 +1212,7 @@ def create_parser():
help='Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval')
workarounds.add_option(
'--sleep-subtitles', metavar='SECONDS',
dest='sleep_interval_subtitles', default=0, type=int,
dest='sleep_interval_subtitles', default=0, type=float,
help='Number of seconds to sleep before each subtitle download')
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')

View File

@@ -42,6 +42,9 @@ class MetadataParserPP(PostProcessor):
to a regex like
'(?P<title>.+)\ \-\ (?P<artist>.+)'
"""
if re.fullmatch(r'\w+', fmt):
# convert a single field name into regex pattern that matches the entire input
return rf'(?s)(?P<{fmt}>.+)'
if not re.search(r'%\(\w+\)s', fmt):
return fmt
lastpos = 0

View File

@@ -1262,7 +1262,8 @@ def unified_strdate(date_str, day_first=True):
return str(upload_date)
def unified_timestamp(date_str, day_first=True):
@partial_application
def unified_timestamp(date_str, day_first=True, tz_offset=0):
if not isinstance(date_str, str):
return None
@@ -1270,7 +1271,8 @@ def unified_timestamp(date_str, day_first=True):
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
timezone, date_str = extract_timezone(
date_str, default=dt.timedelta(hours=tz_offset) if tz_offset else None)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
@@ -2828,7 +2830,7 @@ def js_to_json(code, vars={}, *, strict=False):
{STRING_RE}|
{COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
\b(?:0[xX][0-9a-fA-F]+|(?<!\.)0+[0-7]+)(?:{SKIP_RE}:)?|
[0-9]+(?={SKIP_RE}:)|
!+
''', fix_kv, code)
@@ -3002,6 +3004,8 @@ def mimetype2ext(mt, default=NO_DEFAULT):
'ttaf+xml': 'dfxp',
'ttml+xml': 'ttml',
'x-ms-sami': 'sami',
'x-subrip': 'srt',
'x-srt': 'srt',
# misc
'gzip': 'gz',
@@ -4474,7 +4478,7 @@ def decode_packed_codes(code):
symbol_table[base_n_count] = symbols[count] or base_n_count
return re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
r'\b(\w+)\b', lambda m: symbol_table.get(m.group(0), m.group(0)),
obfuscated_code)

View File

@@ -17,7 +17,7 @@ from .traversal import traverse_obj
def random_user_agent():
USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36'
# Target versions released within the last ~6 months
CHROME_MAJOR_VERSION_RANGE = (134, 140)
CHROME_MAJOR_VERSION_RANGE = (137, 143)
return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0')