mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-13 10:21:30 +00:00
Compare commits
55 Commits
2021.08.02
...
2021.08.10
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
418964fa91 | ||
|
|
c196640ff1 | ||
|
|
60c8fc73c6 | ||
|
|
bc8745480e | ||
|
|
ff5e16f2f6 | ||
|
|
be2fc5b212 | ||
|
|
7be9ccff0b | ||
|
|
245d43cacf | ||
|
|
246fb276e0 | ||
|
|
6e6e0d95b3 | ||
|
|
25a3f4f5d6 | ||
|
|
ad3dc496bb | ||
|
|
2831b4686c | ||
|
|
8c0ae192a4 | ||
|
|
e9f4ccd19e | ||
|
|
a38bd1defa | ||
|
|
476febeb3a | ||
|
|
b6a35ad83b | ||
|
|
bfd56b74b9 | ||
|
|
858a65ecc1 | ||
|
|
3b34e38813 | ||
|
|
3448870205 | ||
|
|
b868936cd6 | ||
|
|
c681cb5d93 | ||
|
|
379e44ed3c | ||
|
|
243c57cfe8 | ||
|
|
28f436bad0 | ||
|
|
2b8a2973bd | ||
|
|
b7b04c782e | ||
|
|
6e84b21559 | ||
|
|
575e17a1b9 | ||
|
|
57015a4a3f | ||
|
|
9cc1a3130a | ||
|
|
b51d2ae3ca | ||
|
|
fee5f0c909 | ||
|
|
7bb6434767 | ||
|
|
124bc071ee | ||
|
|
a047eeb6d2 | ||
|
|
77b87f0519 | ||
|
|
678da2f21b | ||
|
|
cc3fa8d39d | ||
|
|
89efdc15dd | ||
|
|
8012d892bd | ||
|
|
9d65e7bd6d | ||
|
|
36576d7c4c | ||
|
|
bb36a55c41 | ||
|
|
3dbb2a9dcb | ||
|
|
9997eee4af | ||
|
|
3e376d183e | ||
|
|
888299e6ca | ||
|
|
c31be5b009 | ||
|
|
e5611e8eda | ||
|
|
8e6cc12c80 | ||
|
|
e980017ac8 | ||
|
|
e9d9efc0f2 |
2
.github/FUNDING.yml
vendored
2
.github/FUNDING.yml
vendored
@@ -10,4 +10,4 @@ liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
|
||||
custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md']
|
||||
custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators']
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.07.24
|
||||
[debug] yt-dlp version 2021.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://github.com/yt-dlp/yt-dlp. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -29,7 +29,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -21,7 +21,7 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in https://github.com/yt-dlp/yt-dlp.
|
||||
- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
@@ -30,7 +30,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your com
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] yt-dlp version 2021.07.24
|
||||
[debug] yt-dlp version 2021.08.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -21,13 +21,13 @@ assignees: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.07.24. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.08.02. If it's not, see https://github.com/yt-dlp/yt-dlp on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space)
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.07.24**
|
||||
- [ ] I've verified that I'm running yt-dlp version **2021.08.02**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
5
.github/workflows/build.yml
vendored
5
.github/workflows/build.yml
vendored
@@ -103,7 +103,8 @@ jobs:
|
||||
- name: Upgrade pip and enable wheel support
|
||||
run: python -m pip install --upgrade pip setuptools wheel
|
||||
- name: Install Requirements
|
||||
run: pip install pyinstaller mutagen pycryptodome websockets
|
||||
# Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
run: pip install "https://yt-dlp.github.io/pyinstaller-builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: python devscripts/update-version.py
|
||||
@@ -147,7 +148,7 @@ jobs:
|
||||
- name: Upgrade pip and enable wheel support
|
||||
run: python -m pip install --upgrade pip setuptools wheel
|
||||
- name: Install Requirements
|
||||
run: pip install pyinstaller mutagen pycryptodome websockets
|
||||
run: pip install "https://yt-dlp.github.io/pyinstaller-builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: python devscripts/update-version.py
|
||||
|
||||
11
CONTRIBUTORS
11
CONTRIBUTORS
@@ -67,3 +67,14 @@ zerodytrash
|
||||
wesnm
|
||||
pento
|
||||
rigstot
|
||||
dirkf
|
||||
funniray
|
||||
Jessecar96
|
||||
jhwgh1968
|
||||
kikuyan
|
||||
max-te
|
||||
nchilada
|
||||
pgaig
|
||||
PSlava
|
||||
stdedos
|
||||
u-spec-png
|
||||
|
||||
56
Changelog.md
56
Changelog.md
@@ -19,6 +19,60 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2021.08.10
|
||||
|
||||
* Add option `--replace-in-metadata`
|
||||
* Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json`
|
||||
* Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant
|
||||
* Allow multiple `--exec` and `--exec-before-download`
|
||||
* Add regex to `--match-filter`
|
||||
* Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te)
|
||||
* Add compat-option `no-keep-subs`
|
||||
* [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96)
|
||||
* [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [blackboardcollaborate] Add new extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968)
|
||||
* [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan)
|
||||
* [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm)
|
||||
* [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan)
|
||||
* [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm)
|
||||
* [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray)
|
||||
* [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf)
|
||||
* [viki] Print error message from API request
|
||||
* [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig)
|
||||
* [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray)
|
||||
* [youtube] Add extractor-arg `include-live-dash` to show live dash formats
|
||||
* [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava)
|
||||
* [youtube] Raise appropriate error when API pages can't be downloaded
|
||||
* Ensure `_write_ytdl_file` closes file handle on error
|
||||
* Fix `--compat-options filename` by [stdedos](https://github.com/stdedos)
|
||||
* Fix issues with infodict sanitization
|
||||
* Fix resuming when using `--no-part`
|
||||
* Fix wrong extension for intermediate files
|
||||
* Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan)
|
||||
* Show libraries present in verbose head
|
||||
* [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [extractor] Reset non-repeating warnings per video
|
||||
* [ffmpeg] Fix streaming `mp4` to `stdout`
|
||||
* [ffpmeg] Allow `--ffmpeg-location` to be a file with different name
|
||||
* [utils] Fix `InAdvancePagedList.__getitem__`
|
||||
* [utils] Fix `traverse_obj` depth when `is_user_input`
|
||||
* [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev)
|
||||
* [tests:download] Add batch testing for extractors (`test_YourExtractor_all`)
|
||||
* [docs] Document which fields `--add-metadata` adds to the file
|
||||
* [docs] Fix some mistakes and improve doc
|
||||
* [cleanup] Misc code cleanup
|
||||
|
||||
|
||||
### 2021.08.02
|
||||
|
||||
* Add logo, banner and donate links
|
||||
@@ -317,7 +371,7 @@
|
||||
* Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Better message when login required
|
||||
* **Add option `--print`**: to print any field/template
|
||||
* Deprecates: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
|
||||
* Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url`
|
||||
* Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata)
|
||||
* Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b)
|
||||
* Write thumbnail of playlist and add `pl_thumbnail` outtmpl key
|
||||
|
||||
@@ -15,6 +15,8 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
|
||||
## [shirt](https://github.com/shirt-dev)
|
||||
|
||||
[](https://ko-fi.com/shirt)
|
||||
|
||||
* Multithreading (`-N`) and aria2c support for fragment downloads
|
||||
* Support for media initialization and discontinuity in HLS
|
||||
* The self-updater (`-U`)
|
||||
|
||||
2
Makefile
2
Makefile
@@ -13,7 +13,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2 test/testdata/player-*.js
|
||||
rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2 test/testdata/player-*.js *.opus *.webp *.ttml *.vtt *.jpeg
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
clean-cache:
|
||||
|
||||
170
README.md
170
README.md
@@ -1,11 +1,11 @@
|
||||
<div align="center">
|
||||
|
||||
[](#readme)
|
||||
[](#readme)
|
||||
|
||||
[](https://github.com/yt-dlp/yt-dlp/releases/latest)
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions)
|
||||
[](LICENSE)
|
||||
[](Collaborators.md)
|
||||
[](Collaborators.md#collaborators)
|
||||
[](supportedsites.md)
|
||||
[](https://discord.gg/H5MNcFW63r)
|
||||
[](https://yt-dlp.readthedocs.io)
|
||||
@@ -88,9 +88,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
|
||||
|
||||
* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon
|
||||
* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ
|
||||
|
||||
* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer
|
||||
* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds,
|
||||
|
||||
* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details
|
||||
|
||||
@@ -98,11 +98,11 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
|
||||
|
||||
* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [configuration](#configuration) for details
|
||||
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata`
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
* **Other new options**: `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
* **Other new options**: `--print`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
|
||||
* **Improvements**: Multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc
|
||||
|
||||
* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
@@ -138,6 +138,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
* `--compat-options all`: Use all compat options
|
||||
@@ -239,10 +240,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(default) (Alias: --no-abort-on-error)
|
||||
--abort-on-error Abort downloading of further videos if an
|
||||
error occurs (Alias: --no-ignore-errors)
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors
|
||||
--dump-user-agent Display the current user-agent and exit
|
||||
--list-extractors List all supported extractors and exit
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
extractors and exit
|
||||
--force-generic-extractor Force extraction to use the generic
|
||||
extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
@@ -338,25 +339,24 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
"OUTPUT TEMPLATE" for a list of available
|
||||
keys) to match if the key is present, !key
|
||||
to check if the key is not present,
|
||||
key>NUMBER (like "view_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, key = 'LITERAL' (like
|
||||
"uploader = 'Mike Smith'", also works with
|
||||
!=) to match against a string literal and &
|
||||
to require multiple matches. Values which
|
||||
are not known are excluded unless you put a
|
||||
question mark (?) after the operator. For
|
||||
example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
service), but who also have a description,
|
||||
use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description"
|
||||
--match-filter FILTER Generic video filter. Any field (see
|
||||
"OUTPUT TEMPLATE") can be compared with a
|
||||
number or a string using the operators
|
||||
defined in "Filtering formats". You can
|
||||
also simply specify a field to match if the
|
||||
field is present and "!field" to check if
|
||||
the field is not present. In addition,
|
||||
Python style regular expression matching
|
||||
can be done using "~=", and multiple
|
||||
filters can be checked with "&". Use a "\"
|
||||
to escape "&" or quotes if needed. Eg:
|
||||
--match-filter "!is_live & like_count>?100
|
||||
& description~=\'(?i)\bcats \& dogs\b\'"
|
||||
matches only videos that are not live, has
|
||||
a like count more than 100 (or the like
|
||||
field is not available), and also has a
|
||||
description that contains the phrase "cats
|
||||
& dogs" (ignoring case)
|
||||
--no-match-filter Do not use generic video filter (default)
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist
|
||||
@@ -551,8 +551,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--no-write-thumbnail Do not write thumbnail image to disk
|
||||
(default)
|
||||
--write-all-thumbnails Write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail
|
||||
formats
|
||||
--list-thumbnails List available thumbnails of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
|
||||
## Internet Shortcut Options:
|
||||
--write-link Write an internet shortcut file, depending
|
||||
@@ -564,30 +564,34 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--write-desktop-link Write a .desktop Linux internet shortcut
|
||||
|
||||
## Verbosity and Simulation Options:
|
||||
-q, --quiet Activate quiet mode
|
||||
-q, --quiet Activate quiet mode. If used with
|
||||
--verbose, print the log to stderr
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
--no-simulate Download the video even if printing/listing
|
||||
options are used
|
||||
--ignore-no-formats-error Ignore "No video formats" error. Usefull
|
||||
for extracting metadata even if the video
|
||||
is not actually available for download
|
||||
for extracting metadata even if the videos
|
||||
are not actually available for download
|
||||
(experimental)
|
||||
--no-ignore-no-formats-error Throw error when no downloadable video
|
||||
formats are found (default)
|
||||
--skip-download Do not download the video but write all
|
||||
related files (Alias: --no-download)
|
||||
-O, --print TEMPLATE Simulate, quiet but print the given fields.
|
||||
Either a field name or similar formatting
|
||||
as the output template can be used
|
||||
-j, --dump-json Simulate, quiet but print JSON information.
|
||||
See "OUTPUT TEMPLATE" for a description of
|
||||
available keys
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded)
|
||||
-O, --print TEMPLATE Quiet, but print the given fields for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. Either a field name or same syntax as
|
||||
the output template can be used
|
||||
-j, --dump-json Quiet, but print JSON information for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. See "OUTPUT TEMPLATE" for a
|
||||
description of available keys
|
||||
-J, --dump-single-json Quiet, but print JSON information for each
|
||||
url or infojson passed. Simulate unless
|
||||
--no-simulate is used. If the URL refers to
|
||||
a playlist, the whole playlist information
|
||||
is dumped in a single line
|
||||
--force-write-archive Force download archive entries to be
|
||||
written as far as no errors occur, even if
|
||||
-s or another simulation option is used
|
||||
@@ -658,8 +662,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
actually downloadable
|
||||
--no-check-formats Do not check that the formats selected are
|
||||
actually downloadable
|
||||
-F, --list-formats List all available formats of requested
|
||||
videos
|
||||
-F, --list-formats List available formats of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
@@ -677,7 +681,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
(Alias: --write-automatic-subs)
|
||||
--no-write-auto-subs Do not write auto-generated subtitles
|
||||
(default) (Alias: --no-write-automatic-subs)
|
||||
--list-subs List all available subtitles for the video
|
||||
--list-subs List available subtitles of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
@@ -712,7 +717,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--audio-format FORMAT Specify audio format to convert the audio
|
||||
to when -x is used. Currently supported
|
||||
formats are: best (default) or one of
|
||||
aac|flac|mp3|m4a|opus|vorbis|wav
|
||||
best|aac|flac|mp3|m4a|opus|vorbis|wav
|
||||
--audio-quality QUALITY Specify ffmpeg audio quality, insert a
|
||||
value between 0 (better) and 9 (worse) for
|
||||
VBR or a specific bitrate like 128K
|
||||
@@ -772,6 +777,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
--parse-metadata FROM:TO Parse additional metadata like title/artist
|
||||
from other fields; see "MODIFYING METADATA"
|
||||
for details
|
||||
--replace-in-metadata FIELDS REGEX REPLACE
|
||||
Replace text in a metadata field using the
|
||||
given regex. This option can be used
|
||||
multiple times
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
@@ -784,17 +793,22 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||
path to the binary or its containing
|
||||
directory
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing. Similar
|
||||
syntax to the output template can be used
|
||||
downloading and post-processing. Same
|
||||
syntax as the output template can be used
|
||||
to pass any field as arguments to the
|
||||
command. An additional field "filepath"
|
||||
that contains the final path of the
|
||||
downloaded file is also available. If no
|
||||
fields are passed, %(filepath)q is appended
|
||||
to the end of the command
|
||||
to the end of the command. This option can
|
||||
be used multiple times
|
||||
--no-exec Remove any previously defined --exec
|
||||
--exec-before-download CMD Execute a command before the actual
|
||||
download. The syntax is the same as --exec
|
||||
but "filepath" is not available
|
||||
but "filepath" is not available. This
|
||||
option can be used multiple times
|
||||
--no-exec-before-download Remove any previously defined
|
||||
--exec-before-download
|
||||
--convert-subs FORMAT Convert the subtitles to another format
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
@@ -919,7 +933,7 @@ The simplest usage of `-o` is not to set any template arguments when downloading
|
||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||
|
||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||
1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||
@@ -960,7 +974,7 @@ The available fields are:
|
||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||
- `age_limit` (numeric): Age restriction for the video (years)
|
||||
- `live_status` (string): One of 'is_live', 'was_live', 'upcoming', 'not_live'
|
||||
- `live_status` (string): One of 'is_live', 'was_live', 'is_upcoming', 'not_live'
|
||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||
- `was_live` (boolean): Whether this video was originally a live stream
|
||||
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
|
||||
@@ -1323,13 +1337,39 @@ $ yt-dlp -S '+res:480,codec,br'
|
||||
|
||||
# MODIFYING METADATA
|
||||
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata FROM:TO`. The general syntax is to give the name of a field or a template (with similar syntax to [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
The metadata obtained the the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use.
|
||||
|
||||
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or a template (with same syntax as [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`.
|
||||
|
||||
This option also has a few special uses:
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. You can use this to set a different "description" and "synopsis", for example
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis"
|
||||
|
||||
For reference, these are the fields yt-dlp adds by default to the file metadata:
|
||||
|
||||
Metadata fields|From
|
||||
:---|:---
|
||||
`title`|`track` or `title`
|
||||
`date`|`upload_date`
|
||||
`description`, `synopsis`|`description`
|
||||
`purl`, `comment`|`webpage_url`
|
||||
`track`|`track_number`
|
||||
`artist`|`artist`, `creator`, `uploader` or `uploader_id`
|
||||
`genre`|`genre`
|
||||
`album`|`album`
|
||||
`album_artist`|`album_artist`
|
||||
`disc`|`disc_number`
|
||||
`show`|`series`
|
||||
`season_number`|`season_number`
|
||||
`episode_id`|`episode` or `episode_id`
|
||||
`episode_sort`|`episode_number`
|
||||
`language` of each stream|From the format's `language`
|
||||
**Note**: The file format may not support some of these fields
|
||||
|
||||
|
||||
## Modifying metadata examples
|
||||
|
||||
@@ -1348,20 +1388,24 @@ $ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d:
|
||||
# Set "comment" field in video metadata using description instead of webpage_url
|
||||
$ yt-dlp --parse-metadata 'description:(?s)(?P<meta_comment>.+)' --add-metadata
|
||||
|
||||
# Replace all spaces and "_" in title and uploader with a `-`
|
||||
$ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-'
|
||||
|
||||
```
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:skip=dash,hls;player_client=android" --extractor-args "funimation:version=uncut"`
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
The following extractors use this feature:
|
||||
* **youtube**
|
||||
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients
|
||||
* `player_skip`: `configs` - skip any requests for client configs and use defaults
|
||||
* `include_live_dash`: Include live dash formats (These formats don't download properly)
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
|
||||
* `max_comments`: maximum amount of comments to download (default all).
|
||||
* `max_comment_depth`: maximum depth for nested comments. YouTube supports depths 1 or 2 (default).
|
||||
* `max_comments`: Maximum amount of comments to download (default all).
|
||||
* `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default).
|
||||
|
||||
* **funimation**
|
||||
* `language`: Languages to extract. Eg: `funimation:language=english,japanese`
|
||||
@@ -1394,6 +1438,7 @@ While these options are redundant, they are still expected to be used due to the
|
||||
--get-thumbnail --print thumbnail
|
||||
-e, --get-title --print title
|
||||
-g, --get-url --print urls
|
||||
-j, --dump-json --print "%()j"
|
||||
|
||||
|
||||
#### Not recommended
|
||||
@@ -1401,6 +1446,7 @@ While these options still work, their use is not recommended since there are oth
|
||||
|
||||
--all-formats -f all
|
||||
--all-subs --sub-langs all --write-subs
|
||||
--print-json -j --no-simulate
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
|
||||
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||
@@ -1418,7 +1464,7 @@ While these options still work, their use is not recommended since there are oth
|
||||
#### Developer options
|
||||
These options are not intended to be used by the end-user
|
||||
|
||||
--test For testing extractors
|
||||
--test Download only part of video for testing extractors
|
||||
--youtube-print-sig-code For testing youtube signatures
|
||||
|
||||
|
||||
|
||||
@@ -11,5 +11,4 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo python3 -m pytest -k $test_set
|
||||
python3 -m pytest -k "$test_set"
|
||||
|
||||
@@ -95,6 +95,7 @@
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:weekly**
|
||||
- **BandcampMusic**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
@@ -129,6 +130,7 @@
|
||||
- **BitChuteChannel**
|
||||
- **bitwave:replay**
|
||||
- **bitwave:stream**
|
||||
- **BlackboardCollaborate**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **Bloomberg**
|
||||
@@ -295,6 +297,7 @@
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **EroProfile:album**
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
@@ -552,6 +555,8 @@
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
- **miomio.tv**
|
||||
- **mirrativ**
|
||||
- **mirrativ:user**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
@@ -703,6 +708,8 @@
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **openrec**
|
||||
- **openrec:capture**
|
||||
- **OraTV**
|
||||
- **orf:burgenland**: Radio Burgenland
|
||||
- **orf:fm4**: radio FM4
|
||||
@@ -728,6 +735,7 @@
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **ParamountPlus**
|
||||
- **ParamountPlusSeries**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Parlview**
|
||||
@@ -816,6 +824,7 @@
|
||||
- **RCSVarious**
|
||||
- **RCTIPlus**
|
||||
- **RCTIPlusSeries**
|
||||
- **RCTIPlusTV**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBull**
|
||||
- **RedBullEmbed**
|
||||
|
||||
@@ -18,7 +18,7 @@ from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urll
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func
|
||||
from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
@@ -668,20 +668,25 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
out = ydl.escape_outtmpl(outtmpl) % tmpl_dict
|
||||
fname = ydl.prepare_filename(info or self.outtmpl_info)
|
||||
|
||||
if callable(expected):
|
||||
self.assertTrue(expected(out))
|
||||
self.assertTrue(expected(fname))
|
||||
elif isinstance(expected, str):
|
||||
self.assertEqual(out, expected)
|
||||
self.assertEqual(fname, expected)
|
||||
else:
|
||||
self.assertEqual(out, expected[0])
|
||||
self.assertEqual(fname, expected[1])
|
||||
if not isinstance(expected, (list, tuple)):
|
||||
expected = (expected, expected)
|
||||
for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
|
||||
if callable(expect):
|
||||
self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
|
||||
else:
|
||||
self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
|
||||
|
||||
# Side-effects
|
||||
original_infodict = dict(self.outtmpl_info)
|
||||
test('foo.bar', 'foo.bar')
|
||||
original_infodict['epoch'] = self.outtmpl_info.get('epoch')
|
||||
self.assertTrue(isinstance(original_infodict['epoch'], int))
|
||||
test('%(epoch)d', int_or_none)
|
||||
self.assertEqual(original_infodict, self.outtmpl_info)
|
||||
|
||||
# Auto-generated fields
|
||||
test('%(id)s.%(ext)s', '1234.mp4')
|
||||
test('%(duration_string)s', ('27:46:40', '27-46-40'))
|
||||
test('%(epoch)d', int_or_none)
|
||||
test('%(resolution)s', '1080p')
|
||||
test('%(playlist_index)s', '001')
|
||||
test('%(autonumber)s', '00001')
|
||||
@@ -714,7 +719,16 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
# Invalid templates
|
||||
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
|
||||
test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
|
||||
test('%()s', 'NA')
|
||||
test('%(..)s', 'NA')
|
||||
|
||||
# Entire info_dict
|
||||
def expect_same_infodict(out):
|
||||
got_dict = json.loads(out)
|
||||
for info_field, expected in self.outtmpl_info.items():
|
||||
self.assertEqual(got_dict.get(info_field), expected, info_field)
|
||||
return True
|
||||
|
||||
test('%()j', (expect_same_infodict, str))
|
||||
|
||||
# NA placeholder
|
||||
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
|
||||
@@ -774,6 +788,12 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
test('%(formats.0.id.-1+id)f', '1235.000000')
|
||||
test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
yield from range(5)
|
||||
raise self.assertTrue(False, 'LazyList should not be evaluated till here')
|
||||
test('%(key.4)s', '4', info={'key': LazyList(gen())})
|
||||
|
||||
# Empty filename
|
||||
test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
|
||||
# test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme
|
||||
|
||||
@@ -73,6 +73,8 @@ class TestDownload(unittest.TestCase):
|
||||
|
||||
maxDiff = None
|
||||
|
||||
COMPLETED_TESTS = {}
|
||||
|
||||
def __str__(self):
|
||||
"""Identify each test with the `add_ie` attribute, if available."""
|
||||
|
||||
@@ -94,6 +96,9 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
if self.COMPLETED_TESTS.get(tname):
|
||||
return
|
||||
self.COMPLETED_TESTS[tname] = True
|
||||
ie = yt_dlp.extractor.get_info_extractor(test_case['name'])()
|
||||
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
@@ -142,7 +147,7 @@ def generator(test_case, tname):
|
||||
expect_warnings(ydl, test_case.get('expected_warnings', []))
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
return ydl.prepare_filename(dict(tc.get('info_dict', {})))
|
||||
|
||||
res_dict = None
|
||||
|
||||
@@ -255,12 +260,12 @@ def generator(test_case, tname):
|
||||
|
||||
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
tname = 'test_' + str(test_case['name'])
|
||||
i = 1
|
||||
while hasattr(TestDownload, tname):
|
||||
tname = 'test_%s_%d' % (test_case['name'], i)
|
||||
i += 1
|
||||
tests_counter = {}
|
||||
for test_case in defs:
|
||||
name = test_case['name']
|
||||
i = tests_counter.get(name, 0)
|
||||
tests_counter[name] = i + 1
|
||||
tname = f'test_{name}_{i}' if i else f'test_{name}'
|
||||
test_method = generator(test_case, tname)
|
||||
test_method.__name__ = str(tname)
|
||||
ie_list = test_case.get('add_ie')
|
||||
@@ -269,5 +274,22 @@ for n, test_case in enumerate(defs):
|
||||
del test_method
|
||||
|
||||
|
||||
def batch_generator(name, num_tests):
|
||||
|
||||
def test_template(self):
|
||||
for i in range(num_tests):
|
||||
getattr(self, f'test_{name}_{i}' if i else f'test_{name}')()
|
||||
|
||||
return test_template
|
||||
|
||||
|
||||
for name, num_tests in tests_counter.items():
|
||||
test_method = batch_generator(name, num_tests)
|
||||
test_method.__name__ = f'test_{name}_all'
|
||||
test_method.add_ie = ''
|
||||
setattr(TestDownload, test_method.__name__, test_method)
|
||||
del test_method
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -8,13 +8,14 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import is_download_test, try_rm
|
||||
|
||||
|
||||
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
download_file = join(root_dir, 'test.webm')
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestOverwrites(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# create an empty file
|
||||
|
||||
@@ -11,32 +11,31 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_shlex_quote
|
||||
from yt_dlp.postprocessor import (
|
||||
ExecAfterDownloadPP,
|
||||
ExecPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
MetadataFromFieldPP,
|
||||
MetadataFromTitlePP,
|
||||
MetadataParserPP,
|
||||
)
|
||||
|
||||
|
||||
class TestMetadataFromField(unittest.TestCase):
|
||||
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['regex'], r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(
|
||||
MetadataParserPP.format_to_regex('%(title)s - %(artist)s'),
|
||||
r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)')
|
||||
|
||||
def test_field_to_outtmpl(self):
|
||||
pp = MetadataFromFieldPP(None, ['title:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['tmpl'], '%(title)s')
|
||||
def test_field_to_template(self):
|
||||
self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s')
|
||||
self.assertEqual(MetadataParserPP.field_to_template('1'), '1')
|
||||
self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar')
|
||||
self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal')
|
||||
|
||||
def test_in_out_seperation(self):
|
||||
pp = MetadataFromFieldPP(None, ['%(title)s \\: %(artist)s:%(title)s : %(artist)s'])
|
||||
self.assertEqual(pp._data[0]['in'], '%(title)s : %(artist)s')
|
||||
self.assertEqual(pp._data[0]['out'], '%(title)s : %(artist)s')
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
def test_metadatafromfield(self):
|
||||
self.assertEqual(
|
||||
MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'),
|
||||
(MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s'))
|
||||
|
||||
|
||||
class TestConvertThumbnail(unittest.TestCase):
|
||||
@@ -60,12 +59,12 @@ class TestConvertThumbnail(unittest.TestCase):
|
||||
os.remove(file.format(out))
|
||||
|
||||
|
||||
class TestExecAfterDownload(unittest.TestCase):
|
||||
class TestExec(unittest.TestCase):
|
||||
def test_parse_cmd(self):
|
||||
pp = ExecAfterDownloadPP(YoutubeDL(), '')
|
||||
pp = ExecPP(YoutubeDL(), '')
|
||||
info = {'filepath': 'file name'}
|
||||
quoted_filepath = compat_shlex_quote(info['filepath'])
|
||||
cmd = 'echo %s' % compat_shlex_quote(info['filepath'])
|
||||
|
||||
self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath)
|
||||
self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath)
|
||||
self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath'])
|
||||
self.assertEqual(pp.parse_cmd('echo', info), cmd)
|
||||
self.assertEqual(pp.parse_cmd('echo {}', info), cmd)
|
||||
self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd)
|
||||
|
||||
@@ -1207,35 +1207,12 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
'9999 51')
|
||||
|
||||
def test_match_str(self):
|
||||
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||
# Unary
|
||||
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 1200}))
|
||||
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {}))
|
||||
self.assertTrue(match_str('x>?0', {}))
|
||||
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 90, 'description': 'foo'}))
|
||||
self.assertTrue(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
self.assertTrue(match_str('is_live', {'is_live': True}))
|
||||
self.assertFalse(match_str('is_live', {'is_live': False}))
|
||||
self.assertFalse(match_str('is_live', {'is_live': None}))
|
||||
@@ -1249,6 +1226,69 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
self.assertFalse(match_str('!title', {'title': 'abc'}))
|
||||
self.assertFalse(match_str('!title', {'title': ''}))
|
||||
|
||||
# Numeric
|
||||
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {}))
|
||||
self.assertTrue(match_str('x>?0', {}))
|
||||
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||
|
||||
# String
|
||||
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
|
||||
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
|
||||
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
|
||||
|
||||
# And
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 90, 'description': 'foo'}))
|
||||
self.assertTrue(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
|
||||
# Regex
|
||||
self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'}))
|
||||
self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'}))
|
||||
self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'}))
|
||||
self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'}))
|
||||
|
||||
# Quotes
|
||||
self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'}))
|
||||
self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'}))
|
||||
self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'}))
|
||||
self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'}))
|
||||
|
||||
# Escaping &
|
||||
self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'}))
|
||||
self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
|
||||
|
||||
# Example from docs
|
||||
self.assertTrue(
|
||||
r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
|
||||
{'description': 'Raining Cats & Dogs'})
|
||||
|
||||
def test_parse_dfxp_time_expr(self):
|
||||
self.assertEqual(parse_dfxp_time_expr(None), None)
|
||||
self.assertEqual(parse_dfxp_time_expr(''), None)
|
||||
|
||||
@@ -198,7 +198,8 @@ class YoutubeDL(object):
|
||||
(or video) as a single JSON line.
|
||||
force_write_download_archive: Force writing download archive regardless
|
||||
of 'skip_download' or 'simulate'.
|
||||
simulate: Do not download the video files.
|
||||
simulate: Do not download the video files. If unset (or None),
|
||||
simulate only if listsubtitles, listformats or list_thumbnails is used
|
||||
format: Video format code. see "FORMAT SELECTION" for more details.
|
||||
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
|
||||
ignore_no_formats_error: Ignore "No video formats" error. Usefull for
|
||||
@@ -219,7 +220,7 @@ class YoutubeDL(object):
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
||||
outtmpl: Dictionary of templates for output names. Allowed keys
|
||||
are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
|
||||
A string a also accepted for backward compatibility
|
||||
For compatibility with youtube-dl, a single string can also be used
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
trim_file_name: Limit length of filename (extension excluded)
|
||||
@@ -233,6 +234,8 @@ class YoutubeDL(object):
|
||||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
and don't overwrite any file if False
|
||||
For compatibility with youtube-dl,
|
||||
"nooverwrites" may also be used instead
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
@@ -245,7 +248,7 @@ class YoutubeDL(object):
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
clean_infojson: Remove private fields from the infojson
|
||||
writecomments: Extract video comments. This will not be written to disk
|
||||
getcomments: Extract video comments. This will not be written to disk
|
||||
unless writeinfojson is also given
|
||||
writeannotations: Write the video annotations to a .annotations.xml file
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
@@ -404,7 +407,7 @@ class YoutubeDL(object):
|
||||
compat_opts: Compatibility options. See "Differences in default behavior".
|
||||
The following options do not work when used through the API:
|
||||
filename, abort-on-error, multistreams, no-live-chat,
|
||||
no-clean-infojson, no-playlist-metafiles.
|
||||
no-clean-infojson, no-playlist-metafiles, no-keep-subs.
|
||||
Refer __init__.py for their implementation
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
@@ -419,10 +422,12 @@ class YoutubeDL(object):
|
||||
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
|
||||
to the binary or its containing directory.
|
||||
postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
|
||||
and a list of additional command-line arguments for the
|
||||
postprocessor/executable. The dict can also have "PP+EXE" keys
|
||||
which are used when the given exe is used by the given PP.
|
||||
Use 'default' as the name for arguments to passed to all PP
|
||||
and a list of additional command-line arguments for the
|
||||
postprocessor/executable. The dict can also have "PP+EXE" keys
|
||||
which are used when the given exe is used by the given PP.
|
||||
Use 'default' as the name for arguments to passed to all PP
|
||||
For compatibility with youtube-dl, a single list of args
|
||||
can also be used
|
||||
|
||||
The following options are used by the extractors:
|
||||
extractor_retries: Number of times to retry for known errors
|
||||
@@ -514,8 +519,15 @@ class YoutubeDL(object):
|
||||
self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
|
||||
self.params['merge_output_format'] = self.params['final_ext']
|
||||
|
||||
if 'overwrites' in self.params and self.params['overwrites'] is None:
|
||||
del self.params['overwrites']
|
||||
if self.params.get('overwrites') is None:
|
||||
self.params.pop('overwrites', None)
|
||||
elif self.params.get('nooverwrites') is not None:
|
||||
# nooverwrites was unnecessarily changed to overwrites
|
||||
# in 0c3d0f51778b153f65c21906031c2e091fcfb641
|
||||
# This ensures compatibility with both keys
|
||||
self.params['overwrites'] = not self.params['nooverwrites']
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
@@ -706,7 +718,7 @@ class YoutubeDL(object):
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
if self.params.get('simulate'):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
@@ -715,7 +727,7 @@ class YoutubeDL(object):
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if self.params.get('simulate', False):
|
||||
if self.params.get('simulate'):
|
||||
return
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
@@ -887,14 +899,15 @@ class YoutubeDL(object):
|
||||
|
||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
||||
""" Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
|
||||
info_dict = dict(info_dict)
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
||||
|
||||
info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList
|
||||
for key in ('__original_infodict', '__postprocessors'):
|
||||
info_dict.pop(key, None)
|
||||
info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
|
||||
if info_dict.get('duration', None) is not None
|
||||
else None)
|
||||
info_dict['epoch'] = int(time.time())
|
||||
info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||
if info_dict.get('resolution') is None:
|
||||
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
|
||||
@@ -914,7 +927,7 @@ class YoutubeDL(object):
|
||||
}
|
||||
# Field is of the form key1.key2...
|
||||
# where keys (except first) can be string, int or slice
|
||||
FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
|
||||
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
|
||||
MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
|
||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
|
||||
@@ -925,12 +938,15 @@ class YoutubeDL(object):
|
||||
(?:\|(?P<default>.*?))?
|
||||
$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
|
||||
|
||||
get_key = lambda k: traverse_obj(
|
||||
info_dict, k.split('.'), is_user_input=True, traverse_string=True)
|
||||
def _traverse_infodict(k):
|
||||
k = k.split('.')
|
||||
if k[0] == '':
|
||||
k.pop(0)
|
||||
return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
|
||||
|
||||
def get_value(mdict):
|
||||
# Object traversal
|
||||
value = get_key(mdict['fields'])
|
||||
value = _traverse_infodict(mdict['fields'])
|
||||
# Negative
|
||||
if mdict['negate']:
|
||||
value = float_or_none(value)
|
||||
@@ -952,7 +968,7 @@ class YoutubeDL(object):
|
||||
item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
|
||||
offset = float_or_none(item)
|
||||
if offset is None:
|
||||
offset = float_or_none(get_key(item))
|
||||
offset = float_or_none(_traverse_infodict(item))
|
||||
try:
|
||||
value = operator(value, multiplier * offset)
|
||||
except (TypeError, ZeroDivisionError):
|
||||
@@ -964,13 +980,17 @@ class YoutubeDL(object):
|
||||
|
||||
return value
|
||||
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
|
||||
def _dumpjson_default(obj):
|
||||
if isinstance(obj, (set, LazyList)):
|
||||
return list(obj)
|
||||
raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
|
||||
|
||||
def create_key(outer_mobj):
|
||||
if not outer_mobj.group('has_key'):
|
||||
return f'%{outer_mobj.group(0)}'
|
||||
|
||||
prefix = outer_mobj.group('prefix')
|
||||
key = outer_mobj.group('key')
|
||||
original_fmt = fmt = outer_mobj.group('format')
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, key)
|
||||
if mobj is None:
|
||||
value, default, mobj = None, na, {'fields': ''}
|
||||
@@ -979,6 +999,7 @@ class YoutubeDL(object):
|
||||
default = mobj['default'] if mobj['default'] is not None else na
|
||||
value = get_value(mobj)
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||
fmt = '0{:d}d'.format(field_size_compat_map[key])
|
||||
|
||||
@@ -988,7 +1009,7 @@ class YoutubeDL(object):
|
||||
if fmt[-1] == 'l':
|
||||
value, fmt = ', '.join(variadic(value)), str_fmt
|
||||
elif fmt[-1] == 'j':
|
||||
value, fmt = json.dumps(value), str_fmt
|
||||
value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
|
||||
elif fmt[-1] == 'q':
|
||||
value, fmt = compat_shlex_quote(str(value)), str_fmt
|
||||
elif fmt[-1] == 'c':
|
||||
@@ -1010,9 +1031,9 @@ class YoutubeDL(object):
|
||||
if fmt[-1] in 'csr':
|
||||
value = sanitize(mobj['fields'].split('.')[-1], value)
|
||||
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
TMPL_DICT[key] = value
|
||||
return f'{prefix}%({key}){fmt}'
|
||||
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
|
||||
|
||||
return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
|
||||
|
||||
@@ -1058,7 +1079,6 @@ class YoutubeDL(object):
|
||||
self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
|
||||
elif os.path.isabs(filename):
|
||||
self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
|
||||
self.__prepare_filename_warned = True
|
||||
if filename == '-' or not filename:
|
||||
return filename
|
||||
|
||||
@@ -1261,7 +1281,7 @@ class YoutubeDL(object):
|
||||
ie_result = self.process_video_result(ie_result, download=download)
|
||||
additional_urls = (ie_result or {}).get('additional_urls')
|
||||
if additional_urls:
|
||||
# TODO: Improve MetadataFromFieldPP to allow setting a list
|
||||
# TODO: Improve MetadataParserPP to allow setting a list
|
||||
if isinstance(additional_urls, compat_str):
|
||||
additional_urls = [additional_urls]
|
||||
self.to_screen(
|
||||
@@ -1337,15 +1357,12 @@ class YoutubeDL(object):
|
||||
'It needs to be updated.' % ie_result.get('extractor'))
|
||||
|
||||
def _fixup(r):
|
||||
self.add_extra_info(
|
||||
r,
|
||||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
)
|
||||
self.add_extra_info(r, {
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
return r
|
||||
ie_result['entries'] = [
|
||||
self.process_ie_result(_fixup(r), download, extra_info)
|
||||
@@ -1461,7 +1478,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||
|
||||
@@ -1609,7 +1626,7 @@ class YoutubeDL(object):
|
||||
return merger.available and merger.can_merge()
|
||||
|
||||
prefer_best = (
|
||||
not self.params.get('simulate', False)
|
||||
not self.params.get('simulate')
|
||||
and download
|
||||
and (
|
||||
not can_merge()
|
||||
@@ -2182,7 +2199,7 @@ class YoutubeDL(object):
|
||||
format['format'] = '{id} - {res}{note}'.format(
|
||||
id=format['format_id'],
|
||||
res=self.format_resolution(format),
|
||||
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
note=format_field(format, 'format_note', ' (%s)'),
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if format.get('ext') is None:
|
||||
@@ -2211,20 +2228,22 @@ class YoutubeDL(object):
|
||||
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
if self.params.get('listformats'):
|
||||
if not info_dict.get('formats'):
|
||||
raise ExtractorError('No video formats found', expected=True)
|
||||
self.list_formats(info_dict)
|
||||
if self.params.get('listsubtitles'):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(
|
||||
info_dict['id'], automatic_captions, 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||
list_only = self.params.get('simulate') is None and (
|
||||
self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
|
||||
if list_only:
|
||||
# Without this printing, -F --print-json will not work
|
||||
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
if self.params.get('listformats'):
|
||||
if not info_dict.get('formats'):
|
||||
raise ExtractorError('No video formats found', expected=True)
|
||||
self.list_formats(info_dict)
|
||||
if self.params.get('listsubtitles'):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(
|
||||
info_dict['id'], automatic_captions, 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||
return
|
||||
|
||||
format_selector = self.format_selector
|
||||
@@ -2320,7 +2339,8 @@ class YoutubeDL(object):
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(all_sub_langs)[0]]
|
||||
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
|
||||
if requested_langs:
|
||||
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
@@ -2368,6 +2388,8 @@ class YoutubeDL(object):
|
||||
elif 'url' in info_dict:
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
|
||||
if self.params.get('forceprint') or self.params.get('forcejson'):
|
||||
self.post_extract(info_dict)
|
||||
for tmpl in self.params.get('forceprint', []):
|
||||
if re.match(r'\w+$', tmpl):
|
||||
tmpl = '%({})s'.format(tmpl)
|
||||
@@ -2380,13 +2402,12 @@ class YoutubeDL(object):
|
||||
print_optional('thumbnail')
|
||||
print_optional('description')
|
||||
print_optional('filename')
|
||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||
if self.params.get('forceduration') and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
print_mandatory('format')
|
||||
|
||||
if self.params.get('forcejson', False):
|
||||
self.post_extract(info_dict)
|
||||
self.to_stdout(json.dumps(info_dict, default=repr))
|
||||
if self.params.get('forcejson'):
|
||||
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||
|
||||
def dl(self, name, info, subtitle=False, test=False):
|
||||
|
||||
@@ -2421,8 +2442,6 @@ class YoutubeDL(object):
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
@@ -2448,7 +2467,7 @@ class YoutubeDL(object):
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
|
||||
|
||||
if self.params.get('simulate', False):
|
||||
if self.params.get('simulate'):
|
||||
if self.params.get('force_write_download_archive', False):
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
@@ -2548,7 +2567,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
|
||||
write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write video metadata to JSON file ' + infofn)
|
||||
return
|
||||
@@ -2623,6 +2642,7 @@ class YoutubeDL(object):
|
||||
info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
|
||||
else:
|
||||
# Download
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
try:
|
||||
|
||||
def existing_file(*filepaths):
|
||||
@@ -2675,16 +2695,17 @@ class YoutubeDL(object):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||
new_ext = info_dict['ext']
|
||||
|
||||
def correct_ext(filename):
|
||||
def correct_ext(filename, ext=new_ext):
|
||||
if filename == '-':
|
||||
return filename
|
||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||
filename_wo_ext = (
|
||||
os.path.splitext(filename)[0]
|
||||
if filename_real_ext == old_ext
|
||||
if filename_real_ext in (old_ext, new_ext)
|
||||
else filename)
|
||||
return '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
||||
return '%s.%s' % (filename_wo_ext, ext)
|
||||
|
||||
# Ensure filename always has a correct extension for successful merge
|
||||
full_filename = correct_ext(full_filename)
|
||||
@@ -2729,7 +2750,9 @@ class YoutubeDL(object):
|
||||
del new_info['requested_formats']
|
||||
new_info.update(f)
|
||||
if temp_filename != '-':
|
||||
fname = prepend_extension(temp_filename, 'f%s' % f['format_id'], new_info['ext'])
|
||||
fname = prepend_extension(
|
||||
correct_ext(temp_filename, new_info['ext']),
|
||||
'f%s' % f['format_id'], new_info['ext'])
|
||||
if not self._ensure_dir_exists(fname):
|
||||
return
|
||||
downloaded.append(fname)
|
||||
@@ -2858,7 +2881,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
if self.params.get('dump_single_json', False):
|
||||
self.post_extract(res)
|
||||
self.to_stdout(json.dumps(res, default=repr))
|
||||
self.to_stdout(json.dumps(self.sanitize_info(res)))
|
||||
|
||||
return self._download_retcode
|
||||
|
||||
@@ -2867,7 +2890,7 @@ class YoutubeDL(object):
|
||||
[info_filename], mode='r',
|
||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||
# FileInput doesn't have a read method, we can't call json.load
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||
info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
|
||||
@@ -2880,16 +2903,20 @@ class YoutubeDL(object):
|
||||
return self._download_retcode
|
||||
|
||||
@staticmethod
|
||||
def filter_requested_info(info_dict, actually_filter=True):
|
||||
remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
|
||||
def sanitize_info(info_dict, remove_private_keys=False):
|
||||
''' Sanitize the infodict for converting to json '''
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
|
||||
keep_keys = ['_type'], # Always keep this to facilitate load-info-json
|
||||
if actually_filter:
|
||||
remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
|
||||
if remove_private_keys:
|
||||
remove_keys |= {
|
||||
'requested_formats', 'requested_subtitles', 'requested_entries',
|
||||
'filepath', 'entries', 'original_url', 'playlist_autonumber',
|
||||
}
|
||||
empty_values = (None, {}, [], set(), tuple())
|
||||
reject = lambda k, v: k not in keep_keys and (
|
||||
k.startswith('_') or k in remove_keys or v in empty_values)
|
||||
else:
|
||||
info_dict['epoch'] = int(time.time())
|
||||
reject = lambda k, v: k in remove_keys
|
||||
filter_fn = lambda obj: (
|
||||
list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
|
||||
@@ -2897,6 +2924,11 @@ class YoutubeDL(object):
|
||||
else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
|
||||
return filter_fn(info_dict)
|
||||
|
||||
@staticmethod
|
||||
def filter_requested_info(info_dict, actually_filter=True):
|
||||
''' Alias of sanitize_info for backward compatibility '''
|
||||
return YoutubeDL.sanitize_info(info_dict, actually_filter)
|
||||
|
||||
def run_pp(self, pp, infodict):
|
||||
files_to_delete = []
|
||||
if '__files_to_move' not in infodict:
|
||||
@@ -3161,11 +3193,6 @@ class YoutubeDL(object):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
if type('') is not compat_str:
|
||||
# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
|
||||
self.report_warning(
|
||||
'Your Python is broken! Update to a newer and supported version')
|
||||
|
||||
stdout_encoding = getattr(
|
||||
sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
|
||||
encoding_str = (
|
||||
@@ -3221,14 +3248,24 @@ class YoutubeDL(object):
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
for exe, v in sorted(exe_versions.items())
|
||||
if v
|
||||
)
|
||||
if not exe_str:
|
||||
exe_str = 'none'
|
||||
f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
|
||||
) or 'none'
|
||||
self._write_string('[debug] exe versions: %s\n' % exe_str)
|
||||
|
||||
from .downloader.fragment import can_decrypt_frag
|
||||
from .downloader.websocket import has_websockets
|
||||
from .postprocessor.embedthumbnail import has_mutagen
|
||||
from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
|
||||
|
||||
lib_str = ', '.join(sorted(filter(None, (
|
||||
can_decrypt_frag and 'pycryptodome',
|
||||
has_websockets and 'websockets',
|
||||
has_mutagen and 'mutagen',
|
||||
SQLITE_AVAILABLE and 'sqlite',
|
||||
KEYRING_AVAILABLE and 'keyring',
|
||||
)))) or 'none'
|
||||
self._write_string('[debug] Optional libraries: %s\n' % lib_str)
|
||||
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
|
||||
@@ -7,6 +7,7 @@ __license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
@@ -18,6 +19,7 @@ from .options import (
|
||||
)
|
||||
from .compat import (
|
||||
compat_getpass,
|
||||
compat_shlex_quote,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
from .cookies import SUPPORTED_BROWSERS
|
||||
@@ -46,14 +48,15 @@ from .downloader import (
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .extractor.common import InfoExtractor
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .postprocessor.ffmpeg import (
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
FFmpegVideoRemuxerPP,
|
||||
MetadataFromFieldPP,
|
||||
MetadataParserPP,
|
||||
)
|
||||
from .postprocessor.metadatafromfield import MetadataFromFieldPP
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@@ -280,7 +283,7 @@ def _real_main(argv=None):
|
||||
'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json',
|
||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson',
|
||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs',
|
||||
]
|
||||
compat_opts = parse_compat_opts()
|
||||
|
||||
@@ -317,7 +320,7 @@ def _real_main(argv=None):
|
||||
outtmpl_default = opts.outtmpl.get('default')
|
||||
if 'filename' in compat_opts:
|
||||
if outtmpl_default is None:
|
||||
outtmpl_default = '%(title)s.%(id)s.%(ext)s'
|
||||
outtmpl_default = '%(title)s-%(id)s.%(ext)s'
|
||||
opts.outtmpl.update({'default': outtmpl_default})
|
||||
else:
|
||||
_unused_compat_opt('filename')
|
||||
@@ -329,7 +332,8 @@ def _real_main(argv=None):
|
||||
|
||||
for k, tmpl in opts.outtmpl.items():
|
||||
validate_outtmpl(tmpl, '%s output template' % k)
|
||||
for tmpl in opts.forceprint:
|
||||
opts.forceprint = opts.forceprint or []
|
||||
for tmpl in opts.forceprint or []:
|
||||
validate_outtmpl(tmpl, 'print template')
|
||||
|
||||
if opts.extractaudio and not opts.keepvideo and opts.format is None:
|
||||
@@ -344,13 +348,29 @@ def _real_main(argv=None):
|
||||
if re.match(InfoExtractor.FormatSort.regex, f) is None:
|
||||
parser.error('invalid format sort string "%s" specified' % f)
|
||||
|
||||
if opts.metafromfield is None:
|
||||
opts.metafromfield = []
|
||||
def metadataparser_actions(f):
|
||||
if isinstance(f, str):
|
||||
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
|
||||
try:
|
||||
actions = [MetadataFromFieldPP.to_action(f)]
|
||||
except Exception as err:
|
||||
parser.error(f'{cmd} is invalid; {err}')
|
||||
else:
|
||||
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
|
||||
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
|
||||
|
||||
for action in actions:
|
||||
try:
|
||||
MetadataParserPP.validate_action(*action)
|
||||
except Exception as err:
|
||||
parser.error(f'{cmd} is invalid; {err}')
|
||||
yield action
|
||||
|
||||
if opts.parse_metadata is None:
|
||||
opts.parse_metadata = []
|
||||
if opts.metafromtitle is not None:
|
||||
opts.metafromfield.append('title:%s' % opts.metafromtitle)
|
||||
for f in opts.metafromfield:
|
||||
if re.match(MetadataFromFieldPP.regex, f) is None:
|
||||
parser.error('invalid format string "%s" specified for --parse-metadata' % f)
|
||||
opts.parse_metadata.append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata)))
|
||||
|
||||
any_getting = opts.forceprint or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
@@ -402,10 +422,10 @@ def _real_main(argv=None):
|
||||
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
if opts.metafromfield:
|
||||
if opts.parse_metadata:
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromField',
|
||||
'formats': opts.metafromfield,
|
||||
'key': 'MetadataParser',
|
||||
'actions': opts.parse_metadata,
|
||||
# Run this immediately after extraction is complete
|
||||
'when': 'pre_process'
|
||||
})
|
||||
@@ -426,7 +446,7 @@ def _real_main(argv=None):
|
||||
# Must be after all other before_dl
|
||||
if opts.exec_before_dl_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_before_dl_cmd,
|
||||
'when': 'before_dl'
|
||||
})
|
||||
@@ -458,13 +478,13 @@ def _real_main(argv=None):
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.embedsubtitles:
|
||||
already_have_subtitle = opts.writesubtitles
|
||||
already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': already_have_subtitle
|
||||
})
|
||||
if not already_have_subtitle:
|
||||
if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts:
|
||||
opts.writesubtitles = True
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
@@ -497,10 +517,10 @@ def _real_main(argv=None):
|
||||
# XAttrMetadataPP should be run after post-processors that may change file contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# ExecAfterDownload must be the last PP
|
||||
# Exec must be the last PP
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'key': 'Exec',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
# Run this only after the files have been moved to their final locations
|
||||
'when': 'after_move'
|
||||
@@ -550,7 +570,7 @@ def _real_main(argv=None):
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'force_write_download_archive': opts.force_write_download_archive,
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'simulate': (any_getting or None) if opts.simulate is None else opts.simulate,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'allow_unplayable_formats': opts.allow_unplayable_formats,
|
||||
@@ -734,6 +754,11 @@ def main(argv=None):
|
||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
except BrokenPipeError:
|
||||
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
|
||||
devnull = os.open(os.devnull, os.O_WRONLY)
|
||||
os.dup2(devnull, sys.stdout.fileno())
|
||||
sys.exit(r'\nERROR: {err}')
|
||||
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
||||
@@ -47,8 +47,11 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
external_downloader_args: A dictionary of downloader keys (in lower case)
|
||||
and a list of additional command-line arguments for the
|
||||
executable. Use 'default' as the name for arguments to be
|
||||
passed to all downloaders. For compatibility with youtube-dl,
|
||||
a single list of args can also be used
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
|
||||
useful for bypassing bandwidth throttling imposed by
|
||||
@@ -320,12 +323,9 @@ class FileDownloader(object):
|
||||
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
|
||||
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
def report_file_already_downloaded(self, *args, **kwargs):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen('[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('[download] The file has already been downloaded')
|
||||
return self.ydl.report_file_already_downloaded(*args, **kwargs)
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
@@ -343,7 +343,7 @@ class FileDownloader(object):
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', subtitle)
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
|
||||
@@ -382,6 +382,9 @@ class FFmpegFD(ExternalFD):
|
||||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += info_dict.get('_ffmpeg_args', [])
|
||||
|
||||
# This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
@@ -470,6 +473,7 @@ class FFmpegFD(ExternalFD):
|
||||
if self.params.get('test', False):
|
||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||
|
||||
ext = info_dict['ext']
|
||||
if protocol in ('m3u8', 'm3u8_native'):
|
||||
use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
|
||||
if use_mpegts is None:
|
||||
@@ -482,8 +486,10 @@ class FFmpegFD(ExternalFD):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
elif ext == 'mp4' and tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
@@ -105,17 +105,19 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _write_ytdl_file(self, ctx):
|
||||
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if 'extra_state' in ctx:
|
||||
downloader['extra_state'] = ctx['extra_state']
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
frag_index_stream.close()
|
||||
try:
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if 'extra_state' in ctx:
|
||||
downloader['extra_state'] = ctx['extra_state']
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
finally:
|
||||
frag_index_stream.close()
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
@@ -327,7 +329,7 @@ class FragmentFD(FileDownloader):
|
||||
'fragment_index': 0,
|
||||
})
|
||||
|
||||
def download_and_append_fragments(self, ctx, fragments, info_dict, pack_func=None):
|
||||
def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None):
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
|
||||
if not pack_func:
|
||||
@@ -422,5 +424,8 @@ class FragmentFD(FileDownloader):
|
||||
if not result:
|
||||
return False
|
||||
|
||||
if finish_func is not None:
|
||||
ctx['dest_stream'].write(finish_func())
|
||||
ctx['dest_stream'].flush()
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
|
||||
@@ -260,29 +260,35 @@ class HlsFD(FragmentFD):
|
||||
block.end += adjust
|
||||
|
||||
dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
|
||||
cue = block.as_json
|
||||
|
||||
# skip the cue if an identical one appears
|
||||
# in the window of potential duplicates
|
||||
# and prune the window of unviable candidates
|
||||
ready = []
|
||||
|
||||
i = 0
|
||||
skip = True
|
||||
is_new = True
|
||||
while i < len(dedup_window):
|
||||
window_cue = dedup_window[i]
|
||||
if window_cue == cue:
|
||||
break
|
||||
if window_cue['end'] >= cue['start']:
|
||||
i += 1
|
||||
wcue = dedup_window[i]
|
||||
wblock = webvtt.CueBlock.from_json(wcue)
|
||||
i += 1
|
||||
if wblock.hinges(block):
|
||||
wcue['end'] = block.end
|
||||
is_new = False
|
||||
continue
|
||||
if wblock == block:
|
||||
is_new = False
|
||||
continue
|
||||
if wblock.end > block.start:
|
||||
continue
|
||||
ready.append(wblock)
|
||||
i -= 1
|
||||
del dedup_window[i]
|
||||
else:
|
||||
skip = False
|
||||
|
||||
if skip:
|
||||
continue
|
||||
if is_new:
|
||||
dedup_window.append(block.as_json)
|
||||
for block in ready:
|
||||
block.write_into(output)
|
||||
|
||||
# add the cue to the window
|
||||
dedup_window.append(cue)
|
||||
# we only emit cues once they fall out of the duplicate window
|
||||
continue
|
||||
elif isinstance(block, webvtt.Magic):
|
||||
# take care of MPEG PES timestamp overflow
|
||||
if block.mpegts is None:
|
||||
@@ -317,6 +323,19 @@ class HlsFD(FragmentFD):
|
||||
block.write_into(output)
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
|
||||
def fin_fragments():
|
||||
dedup_window = extra_state.get('webvtt_dedup_window')
|
||||
if not dedup_window:
|
||||
return b''
|
||||
|
||||
output = io.StringIO()
|
||||
for cue in dedup_window:
|
||||
webvtt.CueBlock.from_json(cue).write_into(output)
|
||||
|
||||
return output.getvalue().encode('utf-8')
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
pack_fragment = None
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment)
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
||||
@@ -76,6 +76,11 @@ MSO_INFO = {
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
},
|
||||
'Cablevision': {
|
||||
'name': 'Optimum/Cablevision',
|
||||
'username_field': 'j_username',
|
||||
'password_field': 'j_password',
|
||||
},
|
||||
'thr030': {
|
||||
'name': '3 Rivers Communications'
|
||||
},
|
||||
@@ -1581,7 +1586,7 @@ class AdobePassIE(InfoExtractor):
|
||||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend.',
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
@@ -1600,7 +1605,7 @@ class AdobePassIE(InfoExtractor):
|
||||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend.',
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@@ -1616,10 +1621,13 @@ class AdobePassIE(InfoExtractor):
|
||||
'Downloading Provider Redirect Page (meta refresh)')
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
form_data = {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
mso_info.get('password_field', 'password'): password
|
||||
}
|
||||
if mso_id == 'Cablevision':
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
|
||||
@@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -389,3 +389,43 @@ class BandcampWeeklyIE(BandcampIE):
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class BandcampMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
|
||||
_TESTS = [{
|
||||
'url': 'https://steviasphere.bandcamp.com/music',
|
||||
'playlist_mincount': 47,
|
||||
'info_dict': {
|
||||
'id': 'steviasphere',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
|
||||
'playlist_mincount': 399,
|
||||
'info_dict': {
|
||||
'id': 'nuclearwarnowproductions',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_TYPE_IE_DICT = {
|
||||
'album': BandcampAlbumIE.ie_key(),
|
||||
'track': BandcampIE.ie_key()
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://{id}.bandcamp.com/{item[0]}',
|
||||
ie=self._TYPE_IE_DICT[item[1]])
|
||||
for item in items]
|
||||
return self.playlist_result(entries, id)
|
||||
|
||||
68
yt_dlp/extractor/blackboardcollaborate.py
Normal file
68
yt_dlp/extractor/blackboardcollaborate.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class BlackboardCollaborateIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<region>[a-z-]+)\.bbcollab\.com/
|
||||
(?:
|
||||
collab/ui/session/playback/load|
|
||||
recording
|
||||
)/
|
||||
(?P<id>[^/]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
|
||||
'md5': 'bb7a055682ee4f25fdb5838cdf014541',
|
||||
'info_dict': {
|
||||
'id': '0a633b6a88824deb8c918f470b22b256',
|
||||
'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
|
||||
'ext': 'mp4',
|
||||
'duration': 1896000,
|
||||
'timestamp': 1620331399,
|
||||
'upload_date': '20210506',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://us.bbcollab.com/collab/ui/session/playback/load/76761522adfe4345a0dee6794bbcabda',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://ca.bbcollab.com/collab/ui/session/playback/load/b6399dcb44df4f21b29ebe581e22479d',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/recording/51ed7b50810c4444a106e48cefb3e6b5',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://au.bbcollab.com/collab/ui/session/playback/load/2bccf7165d7c419ab87afc1ec3f3bb15',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
region = mobj.group('region')
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
|
||||
duration = info.get('duration')
|
||||
title = info['name']
|
||||
upload_date = info.get('created')
|
||||
streams = info['streams']
|
||||
formats = [{'format_id': k, 'url': url} for k, url in streams.items()]
|
||||
|
||||
return {
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'timestamp': parse_iso8601(upload_date),
|
||||
'title': title,
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata
|
||||
)
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
_APIKEY = '3_qhEcPa5JGFROVwu5SWKqJ4mVOIkwlFNMSKwzPDAh8QZOtHqu6L4nD5Q7lk0eXOOG'
|
||||
_CONTEXT_ID = 'R3595707040'
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -276,35 +276,38 @@ class VrtNUIE(GigyaBaseIE):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'authMode': 'cookie',
|
||||
}
|
||||
|
||||
auth_info = self._gigya_login(auth_data)
|
||||
auth_info = self._download_json(
|
||||
'https://accounts.vrt.be/accounts.login', None,
|
||||
note='Login data', errnote='Could not get Login data',
|
||||
headers={}, data=urlencode_postdata({
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
'sessionExpiration': '-2',
|
||||
'APIKey': self._APIKEY,
|
||||
'targetEnv': 'jssdk',
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry
|
||||
login_attempt = 1
|
||||
while login_attempt <= 3:
|
||||
try:
|
||||
# When requesting a token, no actual token is returned, but the
|
||||
# necessary cookies are set.
|
||||
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
|
||||
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
|
||||
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
|
||||
|
||||
post_data = {
|
||||
'UID': auth_info['UID'],
|
||||
'UIDSignature': auth_info['UIDSignature'],
|
||||
'signatureTimestamp': auth_info['signatureTimestamp'],
|
||||
'client_id': 'vrtnu-site',
|
||||
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
||||
}
|
||||
|
||||
self._request_webpage(
|
||||
'https://token.vrt.be',
|
||||
'https://login.vrt.be/perform_login',
|
||||
None, note='Requesting a token', errnote='Could not get a token',
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': 'https://www.vrt.be/vrtnu/',
|
||||
},
|
||||
data=json.dumps({
|
||||
'uid': auth_info['UID'],
|
||||
'uidsig': auth_info['UIDSignature'],
|
||||
'ts': auth_info['signatureTimestamp'],
|
||||
'email': auth_info['profile']['email'],
|
||||
}).encode('utf-8'))
|
||||
headers={}, data=urlencode_postdata(post_data))
|
||||
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
login_attempt += 1
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -9,6 +8,7 @@ from ..utils import (
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,16 +26,62 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info):
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = f'https://link.theplatform.com/s/{tp_path}'
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
last_e = None
|
||||
for asset_type, query in asset_types.items():
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
if asset_type != 'fallback':
|
||||
continue
|
||||
query['formats'] = '' # blank query to check if expired
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data, trying again with another format' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
self.raise_no_formats(last_e, True, content_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
extra_info.update({
|
||||
'id': content_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
info.update({k: v for k, v in extra_info.items() if v is not None})
|
||||
return info
|
||||
|
||||
def _extract_video_info(self, *args, **kwargs):
|
||||
# Extract assets + metadata and call _extract_common_video_info
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_video_info(self._match_id(url))
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
cbs:|
|
||||
https?://(?:www\.)?(?:
|
||||
(?:cbs|paramountplus)\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
|
||||
cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/|
|
||||
colbertlateshow\.com/(?:video|podcasts)/)
|
||||
)(?P<id>[\w-]+)'''
|
||||
|
||||
# All tests are blocked outside US
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
@@ -52,39 +98,6 @@ class CBSIE(CBSBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
|
||||
'info_dict': {
|
||||
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
|
||||
'ext': 'mp4',
|
||||
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
|
||||
'description': 'md5:7ac835000645a69933df226940e3c859',
|
||||
'duration': 1418,
|
||||
'timestamp': 920264400,
|
||||
'upload_date': '19990301',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
|
||||
'info_dict': {
|
||||
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
|
||||
'ext': 'mp4',
|
||||
'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
|
||||
'description': 'md5:f4adcea3e8b106192022e121f1565bae',
|
||||
'duration': 2506,
|
||||
'timestamp': 1627063200,
|
||||
'upload_date': '20210723',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
|
||||
'info_dict': {
|
||||
@@ -107,12 +120,6 @@ class CBSIE(CBSBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||
@@ -121,14 +128,8 @@ class CBSIE(CBSBaseIE):
|
||||
content_id, query={'partner': site, 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title')
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
useXMLmetadata = True
|
||||
last_e = None
|
||||
asset_types = {}
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
query = {
|
||||
@@ -137,94 +138,24 @@ class CBSIE(CBSBaseIE):
|
||||
}
|
||||
if not asset_type:
|
||||
# fallback for content_ids that videoPlayerService doesn't return anything for
|
||||
useXMLmetadata = False
|
||||
asset_type = 'fallback'
|
||||
query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
|
||||
del query['assetTypes']
|
||||
elif asset_type in asset_types:
|
||||
if asset_type in asset_types:
|
||||
continue
|
||||
elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
if useXMLmetadata:
|
||||
continue
|
||||
query['formats'] = '' # blank query to check if expired
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data, trying again with another format' % asset_type)
|
||||
except ExtractorError as e:
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if last_e and not formats:
|
||||
self.raise_no_formats(last_e, True, content_id)
|
||||
self._sort_formats(formats)
|
||||
asset_types[asset_type] = query
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'id': content_id
|
||||
return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
||||
})
|
||||
if useXMLmetadata:
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL')
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info(content_id)
|
||||
|
||||
|
||||
class ParamountPlusSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountplus.com/shows/drake-josh',
|
||||
'playlist_mincount': 50,
|
||||
'info_dict': {
|
||||
'id': 'drake-josh',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
|
||||
'playlist_mincount': 240,
|
||||
'info_dict': {
|
||||
'id': 'hawaii_five_0',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
|
||||
'playlist_mincount': 248,
|
||||
'info_dict': {
|
||||
'id': 'spongebob-squarepants',
|
||||
}
|
||||
}]
|
||||
_API_URL = 'https://www.paramountplus.com/shows/{}/xhr/episodes/page/0/size/100000/xs/0/season/0/'
|
||||
|
||||
def _entries(self, show_name):
|
||||
show_json = self._download_json(self._API_URL.format(show_name), video_id=show_name)
|
||||
if show_json.get('success'):
|
||||
for episode in show_json['result']['data']:
|
||||
yield self.url_result(
|
||||
'https://www.paramountplus.com%s' % episode['url'],
|
||||
ie=CBSIE.ie_key(), video_id=episode['content_id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||
|
||||
@@ -35,7 +35,6 @@ from ..downloader.f4m import (
|
||||
remove_encrypted_media,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
@@ -45,10 +44,11 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
format_field,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
@@ -56,6 +56,7 @@ from ..utils import (
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
network_exceptions,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
@@ -64,8 +65,8 @@ from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
parse_resolution,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
@@ -75,9 +76,9 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
@@ -297,7 +298,7 @@ class InfoExtractor(object):
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
was_live: True, False, or None (=unknown). Whether this video was
|
||||
originally a live stream.
|
||||
live_status: 'is_live', 'upcoming', 'was_live', 'not_live' or None (=unknown)
|
||||
live_status: 'is_live', 'is_upcoming', 'was_live', 'not_live' or None (=unknown)
|
||||
If absent, automatically set from is_live, was_live
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
@@ -442,6 +443,7 @@ class InfoExtractor(object):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
self._ready = False
|
||||
self._x_forwarded_for_ip = None
|
||||
self._printed_messages = set()
|
||||
self.set_downloader(downloader)
|
||||
|
||||
@classmethod
|
||||
@@ -470,6 +472,7 @@ class InfoExtractor(object):
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
self._printed_messages = set()
|
||||
self._initialize_geo_bypass({
|
||||
'countries': self._GEO_COUNTRIES,
|
||||
'ip_blocks': self._GEO_IP_BLOCKS,
|
||||
@@ -999,10 +1002,14 @@ class InfoExtractor(object):
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def report_warning(self, msg, video_id=None, *args, **kwargs):
|
||||
idstr = '' if video_id is None else '%s: ' % video_id
|
||||
self._downloader.report_warning(
|
||||
'[%s] %s%s' % (self.IE_NAME, idstr, msg), *args, **kwargs)
|
||||
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
|
||||
idstr = format_field(video_id, template='%s: ')
|
||||
msg = f'[{self.IE_NAME}] {idstr}{msg}'
|
||||
if only_once:
|
||||
if f'WARNING: {msg}' in self._printed_messages:
|
||||
return
|
||||
self._printed_messages.add(f'WARNING: {msg}')
|
||||
self._downloader.report_warning(msg, *args, **kwargs)
|
||||
|
||||
def to_screen(self, msg, *args, **kwargs):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
@@ -1947,7 +1954,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the HLS manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_m3u8_formats_and_subtitles(
|
||||
@@ -2230,7 +2237,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the SMIL manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
@@ -2456,7 +2463,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
@@ -2483,7 +2490,7 @@ class InfoExtractor(object):
|
||||
self.report_warning(bug_reports_message(
|
||||
"Ignoring subtitle tracks found in the DASH manifest; "
|
||||
"if any subtitle tracks are missing,"
|
||||
))
|
||||
), only_once=True)
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
@@ -2589,215 +2596,223 @@ class InfoExtractor(object):
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
|
||||
|
||||
if content_type in ('video', 'audio', 'text') or mime_type == 'image/jpeg':
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
if representation_id is not None:
|
||||
format_id = representation_id
|
||||
codecs = representation_attrib.get('codecs', '')
|
||||
if content_type not in ('video', 'audio', 'text'):
|
||||
if mime_type == 'image/jpeg':
|
||||
content_type = 'image/jpeg'
|
||||
if codecs.split('.')[0] == 'stpp':
|
||||
content_type = 'text'
|
||||
else:
|
||||
format_id = content_type
|
||||
if mpd_id:
|
||||
format_id = mpd_id + '-' + format_id
|
||||
if content_type in ('video', 'audio'):
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
elif content_type == 'text':
|
||||
f = {
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'manifest_url': mpd_url,
|
||||
'filesize': filesize,
|
||||
}
|
||||
elif mime_type == 'image/jpeg':
|
||||
# See test case in VikiIE
|
||||
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'ext': 'mhtml',
|
||||
'manifest_url': mpd_url,
|
||||
'format_note': 'DASH storyboards (jpeg)',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
continue
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
if representation_id is not None:
|
||||
format_id = representation_id
|
||||
else:
|
||||
format_id = content_type
|
||||
if mpd_id:
|
||||
format_id = mpd_id + '-' + format_id
|
||||
if content_type in ('video', 'audio'):
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
'container': mimetype2ext(mime_type) + '_dash',
|
||||
}
|
||||
f.update(parse_codecs(codecs))
|
||||
elif content_type == 'text':
|
||||
f = {
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'manifest_url': mpd_url,
|
||||
'filesize': filesize,
|
||||
}
|
||||
elif content_type == 'image/jpeg':
|
||||
# See test case in VikiIE
|
||||
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'ext': 'mhtml',
|
||||
'manifest_url': mpd_url,
|
||||
'format_note': 'DASH storyboards (jpeg)',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
if representation_id is not None:
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
if representation_id is not None:
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
def location_key(location):
|
||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
media_location_key = location_key(media_template)
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
media_location_key = location_key(media_template)
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
media_location_key: segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
media_location_key: segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
segment_uri = representation_ms_info['segment_urls'][segment_index]
|
||||
fragments.append({
|
||||
location_key(segment_uri): segment_uri,
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||
fragments = []
|
||||
segment_duration = float_or_none(
|
||||
representation_ms_info['segment_duration'],
|
||||
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
fragment = {
|
||||
location_key(segment_url): segment_url,
|
||||
}
|
||||
if segment_duration:
|
||||
fragment['duration'] = segment_duration
|
||||
fragments.append(fragment)
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# If there is a fragments key available then we correctly recognized fragmented media.
|
||||
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
||||
# assumption is not necessarily correct since we may simply have no support for
|
||||
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||
'url': mpd_url or base_url,
|
||||
'fragment_base_url': base_url,
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
else:
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
|
||||
formats.append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
segment_uri = representation_ms_info['segment_urls'][segment_index]
|
||||
fragments.append({
|
||||
location_key(segment_uri): segment_uri,
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||
fragments = []
|
||||
segment_duration = float_or_none(
|
||||
representation_ms_info['segment_duration'],
|
||||
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
fragment = {
|
||||
location_key(segment_url): segment_url,
|
||||
}
|
||||
if segment_duration:
|
||||
fragment['duration'] = segment_duration
|
||||
fragments.append(fragment)
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# If there is a fragments key available then we correctly recognized fragmented media.
|
||||
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
||||
# assumption is not necessarily correct since we may simply have no support for
|
||||
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||
'url': mpd_url or base_url,
|
||||
'fragment_base_url': base_url,
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments' if mime_type != 'image/jpeg' else 'mhtml',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
if content_type in ('video', 'audio') or mime_type == 'image/jpeg':
|
||||
formats.append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
|
||||
@@ -29,6 +29,7 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -458,6 +459,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_url = (self._parse_json(self._html_search_regex(
|
||||
r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
|
||||
webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 1920,
|
||||
'height': 1080
|
||||
})
|
||||
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_uploader = self._html_search_regex(
|
||||
@@ -592,21 +605,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
season = episode = episode_number = duration = None
|
||||
|
||||
if isinstance(metadata, compat_etree_Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 640,
|
||||
'height': 360
|
||||
})
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -619,7 +636,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video_uploader,
|
||||
'series': series,
|
||||
'season': season,
|
||||
|
||||
@@ -90,3 +90,40 @@ class EroProfileIE(InfoExtractor):
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
})
|
||||
|
||||
|
||||
class EroProfileAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
|
||||
IE_NAME = 'EroProfile:album'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
|
||||
'info_dict': {
|
||||
'id': 'BBW-2-893',
|
||||
'title': 'BBW 2'
|
||||
},
|
||||
'playlist_mincount': 486,
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_from_page(self, page):
|
||||
for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
|
||||
yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
|
||||
|
||||
def _entries(self, playlist_id, first_page):
|
||||
yield from self._extract_from_page(first_page)
|
||||
|
||||
page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
|
||||
|
||||
for url, n in page_urls[1:]:
|
||||
yield from self._extract_from_page(self._download_webpage(
|
||||
f'https://www.eroprofile.com{url}',
|
||||
playlist_id, note=f'Downloading playlist page {int(n) - 1}'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
|
||||
playlist_title = self._search_regex(
|
||||
r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
|
||||
|
||||
return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
|
||||
|
||||
@@ -109,7 +109,12 @@ from .awaan import (
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bandaichannel import BandaiChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bandcamp import (
|
||||
BandcampIE,
|
||||
BandcampAlbumIE,
|
||||
BandcampWeeklyIE,
|
||||
BandcampMusicIE,
|
||||
)
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
@@ -151,6 +156,7 @@ from .bitwave import (
|
||||
BitwaveStreamIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .blackboardcollaborate import BlackboardCollaborateIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
@@ -202,10 +208,7 @@ from .cbc import (
|
||||
CBCWatchIE,
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import (
|
||||
CBSIE,
|
||||
ParamountPlusSeriesIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
@@ -386,7 +389,10 @@ from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
@@ -735,6 +741,10 @@ from .minds import (
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
MirrativUserIE,
|
||||
)
|
||||
from .mit import TechTVMITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import (
|
||||
@@ -936,6 +946,10 @@ from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
)
|
||||
from .openrec import (
|
||||
OpenRecIE,
|
||||
OpenRecCaptureIE,
|
||||
)
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
@@ -965,6 +979,10 @@ from .palcomp3 import (
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .paramountplus import (
|
||||
ParamountPlusIE,
|
||||
ParamountPlusSeriesIE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .parlview import ParlviewIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -1077,6 +1095,7 @@ from .rcs import (
|
||||
from .rcti import (
|
||||
RCTIPlusIE,
|
||||
RCTIPlusSeriesIE,
|
||||
RCTIPlusTVIE,
|
||||
)
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
|
||||
@@ -32,7 +32,7 @@ class HotStarBaseIE(InfoExtractor):
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
|
||||
if cookies.get('userUP'):
|
||||
if cookies and cookies.get('userUP'):
|
||||
token = cookies.get('userUP').value
|
||||
else:
|
||||
token = self._download_json(
|
||||
@@ -59,9 +59,12 @@ class HotStarBaseIE(InfoExtractor):
|
||||
return response['data']
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
return self._call_api_impl(path, video_id, {
|
||||
return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
}, headers={
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'PCTV',
|
||||
})
|
||||
|
||||
def _call_api_v2(self, path, video_id, st=None, cookies=None):
|
||||
@@ -77,15 +80,25 @@ class HotStarBaseIE(InfoExtractor):
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
tv/(?:[^/?#]+/){3}|
|
||||
(?!tv/)[^?#]+/
|
||||
)?
|
||||
(?P<id>\d{10})
|
||||
(?:
|
||||
hotstar\:|
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
)
|
||||
(?:
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv))
|
||||
(?:
|
||||
\:|
|
||||
/[^/?#]+/
|
||||
(?(tv)
|
||||
(?:[^/?#]+/){2}|
|
||||
(?:[^/?#]+/)*
|
||||
)
|
||||
)|
|
||||
[^/?#]+/
|
||||
)?
|
||||
(?P<id>\d{10})
|
||||
'''
|
||||
_TESTS = [{
|
||||
# contentData
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@@ -96,54 +109,87 @@ class HotStarIE(HotStarBaseIE):
|
||||
'upload_date': '20151111',
|
||||
'duration': 381,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# contentDetail
|
||||
'url': 'hotstar:1000076273',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'info_dict': {
|
||||
'id': '1000057157',
|
||||
'ext': 'mp4',
|
||||
'title': 'Radha Gopalam',
|
||||
'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22',
|
||||
'timestamp': 1140805800,
|
||||
'upload_date': '20060224',
|
||||
'duration': 9182,
|
||||
},
|
||||
}, {
|
||||
'url': 'hotstar:movies:1000057157',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# contentData
|
||||
'url': 'hotstar:sports:1260065956',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# contentData
|
||||
'url': 'hotstar:sports:1260066104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only available via api v2
|
||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||
'info_dict': {
|
||||
'id': '1000234847',
|
||||
'ext': 'mp4',
|
||||
'title': 'Janhvi Targets Suman',
|
||||
'description': 'md5:78a85509348910bd1ca31be898c5796b',
|
||||
'timestamp': 1556670600,
|
||||
'upload_date': '20190501',
|
||||
'duration': 1219,
|
||||
'channel': 'StarPlus',
|
||||
'channel_id': 3,
|
||||
'series': 'Ek Bhram - Sarvagun Sampanna',
|
||||
'season': 'Chapter 1',
|
||||
'season_number': 1,
|
||||
'season_id': 6771,
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
},
|
||||
}, {
|
||||
'url': 'hotstar:episode:1000234847',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_TYPE = {
|
||||
'movies': 'movie',
|
||||
'sports': 'match',
|
||||
'episode': 'episode',
|
||||
'tv': 'episode',
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('type')
|
||||
cookies = self._get_cookies(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
st = urlh.headers.get('x-origin-date')
|
||||
app_state = self._parse_json(self._search_regex(
|
||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||
webpage, 'app state'), video_id)
|
||||
video_data = {}
|
||||
getters = list(
|
||||
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||
for k in ('Data', 'Detail')
|
||||
)
|
||||
for v in app_state.values():
|
||||
content = try_get(v, getters, dict)
|
||||
if content and content.get('contentId') == video_id:
|
||||
video_data = content
|
||||
break
|
||||
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item']
|
||||
title = video_data['title']
|
||||
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
headers = {'Referer': url}
|
||||
headers = {'Referer': 'https://www.hotstar.com/in'}
|
||||
formats = []
|
||||
subs = {}
|
||||
geo_restricted = False
|
||||
_, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id)
|
||||
# Required to fix https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = urlh.headers.get('x-origin-date')
|
||||
# change to v2 in the future
|
||||
playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
@@ -160,13 +206,17 @@ class HotStarIE(HotStarBaseIE):
|
||||
ext = determine_ext(format_url)
|
||||
try:
|
||||
if 'package:hls' in tags or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', headers=headers))
|
||||
m3u8_id='hls', headers=headers)
|
||||
formats.extend(hls_formats)
|
||||
subs = self._merge_subtitles(subs, hls_subs)
|
||||
elif 'package:dash' in tags or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', headers=headers))
|
||||
dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', headers=headers)
|
||||
formats.extend(dash_formats)
|
||||
subs = self._merge_subtitles(subs, dash_subs)
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
pass
|
||||
@@ -194,6 +244,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'series': video_data.get('showName'),
|
||||
@@ -222,8 +273,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
|
||||
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results']
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.hotstar.com/%s' % video['contentId'],
|
||||
@@ -270,7 +320,7 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
video_id=series_id, headers=headers)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.hotstar.com/%d' % video['contentId'],
|
||||
'hotstar:episode:%d' % video['contentId'],
|
||||
ie=HotStarIE.ie_key(), video_id=video['contentId'])
|
||||
for video in item_json['body']['results']['items']
|
||||
if video.get('contentId')]
|
||||
|
||||
@@ -195,18 +195,23 @@ class InstagramIE(InfoExtractor):
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
if count is not None:
|
||||
return count
|
||||
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
comments = []
|
||||
for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']):
|
||||
comment_dict = comment.get('node', {})
|
||||
comment_text = comment_dict.get('text')
|
||||
if comment_text:
|
||||
comments.append({
|
||||
'author': try_get(comment_dict, lambda x: x['owner']['username']),
|
||||
'author_id': try_get(comment_dict, lambda x: x['owner']['id']),
|
||||
'id': comment_dict.get('id'),
|
||||
'text': comment_text,
|
||||
'timestamp': int_or_none(comment_dict.get('created_at')),
|
||||
})
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
|
||||
134
yt_dlp/extractor/mirrativ.py
Normal file
134
yt_dlp/extractor/mirrativ.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MirrativBaseIE(InfoExtractor):
|
||||
def assert_error(self, response):
|
||||
error_message = traverse_obj(response, ('status', 'error'))
|
||||
if error_message:
|
||||
raise ExtractorError('Mirrativ says: %s' % error_message, expected=True)
|
||||
|
||||
|
||||
class MirrativIE(MirrativBaseIE):
|
||||
IE_NAME = 'mirrativ'
|
||||
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
|
||||
LIVE_API_URL = 'https://www.mirrativ.com/api/live/live?live_id=%s'
|
||||
|
||||
TESTS = [{
|
||||
'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id)
|
||||
live_response = self._download_json(self.LIVE_API_URL % video_id, video_id)
|
||||
self.assert_error(live_response)
|
||||
|
||||
hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls'))
|
||||
is_live = bool(live_response.get('is_live'))
|
||||
was_live = bool(live_response.get('is_archive'))
|
||||
if not hls_url:
|
||||
raise ExtractorError('Neither archive nor live is available.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, video_id,
|
||||
ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', live=is_live)
|
||||
rtmp_url = live_response.get('streaming_url_edge')
|
||||
if rtmp_url:
|
||||
keys_to_copy = ('width', 'height', 'vcodec', 'acodec', 'tbr')
|
||||
fmt = {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
fmt.update({k: traverse_obj(formats, (0, k)) for k in keys_to_copy})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<title>\s*(.+?) - Mirrativ\s*</title>', webpage) or live_response.get('title')
|
||||
description = live_response.get('description')
|
||||
thumbnail = live_response.get('image_url')
|
||||
|
||||
duration = try_get(live_response, lambda x: x['ended_at'] - x['started_at'])
|
||||
view_count = live_response.get('total_viewer_num')
|
||||
release_timestamp = live_response.get('started_at')
|
||||
timestamp = live_response.get('created_at')
|
||||
|
||||
owner = live_response.get('owner', {})
|
||||
uploader = owner.get('name')
|
||||
uploader_id = owner.get('user_id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': is_live,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'release_timestamp': release_timestamp,
|
||||
'timestamp': timestamp,
|
||||
'was_live': was_live,
|
||||
}
|
||||
|
||||
|
||||
class MirrativUserIE(MirrativBaseIE):
|
||||
IE_NAME = 'mirrativ:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P<id>\d+)'
|
||||
LIVE_HISTORY_API_URL = 'https://www.mirrativ.com/api/live/live_history?user_id=%s&page=%d'
|
||||
USER_INFO_API_URL = 'https://www.mirrativ.com/api/user/profile?user_id=%s'
|
||||
|
||||
_TESTS = [{
|
||||
# Live archive is available up to 3 days
|
||||
# see: https://helpfeel.com/mirrativ/%E9%8C%B2%E7%94%BB-5e26d3ad7b59ef0017fb49ac (Japanese)
|
||||
'url': 'https://www.mirrativ.com/user/110943130',
|
||||
'note': 'multiple archives available',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, user_id):
|
||||
page = 1
|
||||
while page is not None:
|
||||
api_response = self._download_json(
|
||||
self.LIVE_HISTORY_API_URL % (user_id, page), user_id,
|
||||
note='Downloading page %d' % page)
|
||||
self.assert_error(api_response)
|
||||
lives = api_response.get('lives')
|
||||
if not lives:
|
||||
break
|
||||
for live in lives:
|
||||
if not live.get('is_archive') and not live.get('is_live'):
|
||||
# neither archive nor live is available, so skip it
|
||||
# or the service will ban your IP address for a while
|
||||
continue
|
||||
live_id = live.get('live_id')
|
||||
url = 'https://www.mirrativ.com/live/%s' % live_id
|
||||
yield self.url_result(url, video_id=live_id, video_title=live.get('title'))
|
||||
page = api_response.get('next_page')
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
user_info = self._download_json(
|
||||
self.USER_INFO_API_URL % user_id, user_id,
|
||||
note='Downloading user info', fatal=False)
|
||||
self.assert_error(user_info)
|
||||
|
||||
uploader = user_info.get('name')
|
||||
description = user_info.get('description')
|
||||
|
||||
entries = self._entries(user_id)
|
||||
return self.playlist_result(entries, user_id, uploader, description)
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
RegexNotFoundError,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
@@ -460,7 +461,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
|
||||
class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
# Geo-restricted to US
|
||||
@@ -483,13 +484,18 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
try:
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
|
||||
iframe_url = drupal_settings['vod']['iframe_url']
|
||||
theplatform_url = iframe_url.replace(
|
||||
'vplayer.nbcolympics.com', 'player.theplatform.com')
|
||||
iframe_url = drupal_settings['vod']['iframe_url']
|
||||
theplatform_url = iframe_url.replace(
|
||||
'vplayer.nbcolympics.com', 'player.theplatform.com')
|
||||
except RegexNotFoundError:
|
||||
theplatform_url = self._search_regex(
|
||||
r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
|
||||
webpage, 'embedding URL', group="embedUrl")
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -502,43 +508,79 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
|
||||
'info_dict': {
|
||||
'id': '203493',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
_TESTS = [
|
||||
{
|
||||
'note': 'Tokenized m3u8 source URL',
|
||||
'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
|
||||
'info_dict': {
|
||||
'id': '2019740',
|
||||
'ext': 'mp4',
|
||||
'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'note': 'Plain m3u8 source URL',
|
||||
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
|
||||
'info_dict': {
|
||||
'id': '2021729',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
|
||||
resource = self._search_regex(
|
||||
r"resource\s*=\s*'(.+)';", webpage,
|
||||
'resource').replace("' + pid + '", pid)
|
||||
|
||||
event_config = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('event_config', pid),
|
||||
pid)['eventConfig']
|
||||
title = self._live_title(event_config['eventTitle'])
|
||||
f'http://stream.nbcolympics.com/data/event_config_{pid}.json',
|
||||
pid, 'Downloading event config')['eventConfig']
|
||||
|
||||
title = event_config['eventTitle']
|
||||
is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus'))
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
source_url = self._download_json(
|
||||
self._DATA_URL_TEMPLATE % ('live_sources', pid),
|
||||
pid)['videoSources'][0]['sourceUrl']
|
||||
media_token = self._extract_mvpd_auth(
|
||||
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
|
||||
formats = self._extract_m3u8_formats(self._download_webpage(
|
||||
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
|
||||
'cdn': 'akamai',
|
||||
'mediaToken': base64.b64encode(media_token.encode()),
|
||||
'resource': base64.b64encode(resource.encode()),
|
||||
'url': source_url,
|
||||
}), pid, 'mp4')
|
||||
f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging',
|
||||
pid, 'Downloading leap config'
|
||||
)['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
|
||||
|
||||
if event_config.get('cdnToken'):
|
||||
ap_resource = self._get_mvpd_resource(
|
||||
event_config.get('resourceId', 'NBCOlympics'),
|
||||
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
|
||||
event_config.get('ratingId', 'NO VALUE'))
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
|
||||
|
||||
source_url = self._download_json(
|
||||
'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
|
||||
data=json.dumps({
|
||||
'application': 'NBCSports',
|
||||
'authentication-type': 'adobe-pass',
|
||||
'cdn': 'akamai',
|
||||
'pid': pid,
|
||||
'platform': 'desktop',
|
||||
'requestorId': 'NBCOlympics',
|
||||
'resourceId': base64.b64encode(ap_resource.encode()).decode(),
|
||||
'token': base64.b64encode(media_token.encode()).decode(),
|
||||
'url': source_url,
|
||||
'version': 'v1',
|
||||
}).encode(),
|
||||
)['akamai'][0]['tokenizedUrl']
|
||||
|
||||
formats = self._extract_m3u8_formats(source_url, pid, 'mp4', live=is_live)
|
||||
for f in formats:
|
||||
# -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
|
||||
# download with ffmpeg without this option
|
||||
f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0']
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -546,5 +588,5 @@ class NBCOlympicsStreamIE(AdobePassIE):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -4,9 +4,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
unified_timestamp,
|
||||
@@ -14,18 +14,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>\d+)(?:/format/flash)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.newgrounds.com/audio/listen/549479',
|
||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||
'info_dict': {
|
||||
'id': '549479',
|
||||
'ext': 'mp3',
|
||||
'title': 'Burn7 - B7 - BusMode',
|
||||
'title': 'B7 - BusMode',
|
||||
'uploader': 'Burn7',
|
||||
'timestamp': 1378878540,
|
||||
'upload_date': '20130911',
|
||||
'duration': 143,
|
||||
'description': 'md5:6d885138814015dfd656c2ddb00dacfc',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.newgrounds.com/portal/view/1',
|
||||
@@ -33,10 +34,11 @@ class NewgroundsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brian-Beaton - Scrotum 1',
|
||||
'title': 'Scrotum 1',
|
||||
'uploader': 'Brian-Beaton',
|
||||
'timestamp': 955064100,
|
||||
'upload_date': '20000406',
|
||||
'description': 'Scrotum plays "catch."',
|
||||
},
|
||||
}, {
|
||||
# source format unavailable, additional mp4 formats
|
||||
@@ -44,14 +46,39 @@ class NewgroundsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '689400',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bennettthesage - ZTV News Episode 8',
|
||||
'uploader': 'BennettTheSage',
|
||||
'title': 'ZTV News Episode 8',
|
||||
'uploader': 'ZONE-SAMA',
|
||||
'timestamp': 1487965140,
|
||||
'upload_date': '20170224',
|
||||
'description': 'ZTV News Episode 8 (February 2017)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.newgrounds.com/portal/view/297383',
|
||||
'md5': '2c11f5fd8cb6b433a63c89ba3141436c',
|
||||
'info_dict': {
|
||||
'id': '297383',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Gear Awesome',
|
||||
'uploader': 'Egoraptor',
|
||||
'timestamp': 1140663240,
|
||||
'upload_date': '20060223',
|
||||
'description': 'Metal Gear is awesome is so is this movie.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.newgrounds.com/portal/view/297383/format/flash',
|
||||
'md5': '5d05585a9a0caca059f5abfbd3865524',
|
||||
'info_dict': {
|
||||
'id': '297383',
|
||||
'ext': 'swf',
|
||||
'title': 'Metal Gear Awesome',
|
||||
'description': 'Metal Gear is awesome is so is this movie.',
|
||||
'uploader': 'Egoraptor',
|
||||
'upload_date': '20060223',
|
||||
'timestamp': 1140663240,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -73,38 +100,14 @@ class NewgroundsIE(InfoExtractor):
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
}]
|
||||
|
||||
max_resolution = int_or_none(self._search_regex(
|
||||
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||
default=None))
|
||||
if max_resolution:
|
||||
url_base = media_url.rpartition('.')[0]
|
||||
for resolution in (360, 720, 1080):
|
||||
if resolution > max_resolution:
|
||||
break
|
||||
formats.append({
|
||||
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||
'format_id': '%dp' % resolution,
|
||||
'height': resolution,
|
||||
})
|
||||
else:
|
||||
video_id = int_or_none(self._search_regex(
|
||||
r'data-movie-id=\\"([0-9]+)\\"', webpage, ''))
|
||||
if not video_id:
|
||||
raise ExtractorError('Could not extract media data')
|
||||
|
||||
url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id
|
||||
headers = {
|
||||
json_video = self._download_json('https://www.newgrounds.com/portal/video/' + media_id, media_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
}
|
||||
json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False)
|
||||
if not json_video:
|
||||
raise ExtractorError('Could not fetch media data')
|
||||
})
|
||||
|
||||
uploader = json_video.get('author')
|
||||
title = json_video.get('title')
|
||||
media_formats = json_video.get('sources', [])
|
||||
for media_format in media_formats:
|
||||
media_sources = media_formats[media_format]
|
||||
@@ -115,9 +118,6 @@ class NewgroundsIE(InfoExtractor):
|
||||
'url': source.get('src')
|
||||
})
|
||||
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not uploader:
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*(?:Author|Artist)\s*</em>',
|
||||
@@ -132,6 +132,9 @@ class NewgroundsIE(InfoExtractor):
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
|
||||
'duration', default=None))
|
||||
|
||||
view_count = parse_count(self._html_search_regex(r'(?s)<dt>\s*Views\s*</dt>\s*<dd>([\d\.,]+)</dd>', webpage,
|
||||
'view_count', fatal=False, default=None))
|
||||
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
|
||||
default=None))
|
||||
@@ -140,9 +143,8 @@ class NewgroundsIE(InfoExtractor):
|
||||
|
||||
if '<dd>Song' in webpage:
|
||||
formats[0]['vcodec'] = 'none'
|
||||
|
||||
if uploader:
|
||||
title = "%s - %s" % (uploader, title)
|
||||
self._check_formats(formats, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
@@ -151,6 +153,9 @@ class NewgroundsIE(InfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
@@ -162,14 +167,14 @@ class NewgroundsPlaylistIE(InfoExtractor):
|
||||
'id': 'cats',
|
||||
'title': 'Cats',
|
||||
},
|
||||
'playlist_mincount': 46,
|
||||
'playlist_mincount': 45,
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
|
||||
'url': 'https://www.newgrounds.com/collection/dogs',
|
||||
'info_dict': {
|
||||
'id': 'ZONE-SAMA',
|
||||
'title': 'Portal Search: ZONE-SAMA',
|
||||
'id': 'dogs',
|
||||
'title': 'Dogs',
|
||||
},
|
||||
'playlist_mincount': 47,
|
||||
'playlist_mincount': 26,
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/audio/search/title/cats',
|
||||
'only_matching': True,
|
||||
@@ -190,7 +195,7 @@ class NewgroundsPlaylistIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for a, path, media_id in re.findall(
|
||||
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||
r'(<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||
webpage):
|
||||
a_class = extract_attributes(a).get('class')
|
||||
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
|
||||
|
||||
@@ -13,16 +13,16 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
PostProcessingError,
|
||||
str_or_none,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
|
||||
126
yt_dlp/extractor/openrec.py
Normal file
126
yt_dlp/extractor/openrec.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_strdate
|
||||
)
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class OpenRecIE(InfoExtractor):
|
||||
IE_NAME = 'openrec'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.openrec.tv/live/wez93eqvjzl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
|
||||
|
||||
window_stores = self._parse_json(
|
||||
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
|
||||
movie_store = traverse_obj(
|
||||
window_stores,
|
||||
('v8', 'state', 'movie'),
|
||||
('v8', 'movie'),
|
||||
expected_type=dict)
|
||||
if not movie_store:
|
||||
raise ExtractorError('Failed to extract live info')
|
||||
|
||||
title = movie_store.get('title')
|
||||
description = movie_store.get('introduction')
|
||||
thumbnail = movie_store.get('thumbnailUrl')
|
||||
|
||||
channel_user = movie_store.get('channel', {}).get('user')
|
||||
uploader = try_get(channel_user, lambda x: x['name'], compat_str)
|
||||
uploader_id = try_get(channel_user, lambda x: x['id'], compat_str)
|
||||
|
||||
timestamp = traverse_obj(movie_store, ('startedAt', 'time'), expected_type=int)
|
||||
|
||||
m3u8_playlists = movie_store.get('media')
|
||||
formats = []
|
||||
for (name, m3u8_url) in m3u8_playlists.items():
|
||||
if not m3u8_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8',
|
||||
m3u8_id='hls-%s' % name, live=True))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class OpenRecCaptureIE(InfoExtractor):
|
||||
IE_NAME = 'openrec:capture'
|
||||
_VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.openrec.tv/capture/mldjr82p7qk',
|
||||
'info_dict': {
|
||||
'id': 'mldjr82p7qk',
|
||||
'title': 'たいじの恥ずかしい英語力',
|
||||
'uploader': 'たいちゃんねる',
|
||||
'uploader_id': 'Yaritaiji',
|
||||
'upload_date': '20210803',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
|
||||
|
||||
window_stores = self._parse_json(
|
||||
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
|
||||
movie_store = window_stores.get('movie')
|
||||
|
||||
capture_data = window_stores.get('capture')
|
||||
if not capture_data:
|
||||
raise ExtractorError('Cannot extract title')
|
||||
title = capture_data.get('title')
|
||||
thumbnail = capture_data.get('thumbnailUrl')
|
||||
upload_date = unified_strdate(capture_data.get('createdAt'))
|
||||
|
||||
channel_info = movie_store.get('channel') or {}
|
||||
uploader = channel_info.get('name')
|
||||
uploader_id = channel_info.get('id')
|
||||
|
||||
m3u8_url = capture_data.get('source')
|
||||
if not m3u8_url:
|
||||
raise ExtractorError('Cannot extract m3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
145
yt_dlp/extractor/paramountplus.py
Normal file
145
yt_dlp/extractor/paramountplus.py
Normal file
@@ -0,0 +1,145 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ParamountPlusIE(CBSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
paramountplus:|
|
||||
https?://(?:www\.)?(?:
|
||||
paramountplus\.com/(?:shows/[^/]+/video|movies/[^/]+)/
|
||||
)(?P<id>[\w-]+))'''
|
||||
|
||||
# All tests are blocked outside US
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
|
||||
'info_dict': {
|
||||
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
|
||||
'ext': 'mp4',
|
||||
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
|
||||
'description': 'md5:7ac835000645a69933df226940e3c859',
|
||||
'duration': 1418,
|
||||
'timestamp': 920264400,
|
||||
'upload_date': '19990301',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
|
||||
'info_dict': {
|
||||
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
|
||||
'ext': 'mp4',
|
||||
'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
|
||||
'description': 'md5:f4adcea3e8b106192022e121f1565bae',
|
||||
'duration': 2506,
|
||||
'timestamp': 1627063200,
|
||||
'upload_date': '20210723',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/daddys-home/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
|
||||
'info_dict': {
|
||||
'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daddy\'s Home',
|
||||
'upload_date': '20151225',
|
||||
'description': 'md5:a0beaf24e8d3b0e81b2ee41d47c06f33',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'timestamp': 1451030400,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/sonic-the-hedgehog/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
|
||||
'info_dict': {
|
||||
'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
|
||||
'timestamp': 1577865600,
|
||||
'title': 'Sonic the Hedgehog',
|
||||
'upload_date': '20200101',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'],
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, mpx_acc=2198311517):
|
||||
items_data = self._download_json(
|
||||
'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/%s.json' % content_id,
|
||||
content_id, query={'locale': 'en-us', 'at': 'ABCqWNNSwhIqINWIIAG+DFzcFUvF8/vcN6cNyXFFfNzWAIvXuoVgX+fK4naOC7V8MLI='})
|
||||
|
||||
asset_types = {
|
||||
item.get('assetType'): {
|
||||
'format': 'SMIL',
|
||||
'formats': 'MPEG4,M3U',
|
||||
} for item in items_data['itemList']
|
||||
}
|
||||
item = items_data['itemList'][-1]
|
||||
return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={
|
||||
'title': item.get('title'),
|
||||
'series': item.get('seriesTitle'),
|
||||
'season_number': int_or_none(item.get('seasonNum')),
|
||||
'episode_number': int_or_none(item.get('episodeNum')),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnail': url_or_none(item.get('thumbnail')),
|
||||
})
|
||||
|
||||
|
||||
class ParamountPlusSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountplus.com/shows/drake-josh',
|
||||
'playlist_mincount': 50,
|
||||
'info_dict': {
|
||||
'id': 'drake-josh',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
|
||||
'playlist_mincount': 240,
|
||||
'info_dict': {
|
||||
'id': 'hawaii_five_0',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
|
||||
'playlist_mincount': 248,
|
||||
'info_dict': {
|
||||
'id': 'spongebob-squarepants',
|
||||
}
|
||||
}]
|
||||
_API_URL = 'https://www.paramountplus.com/shows/{}/xhr/episodes/page/0/size/100000/xs/0/season/0/'
|
||||
|
||||
def _entries(self, show_name):
|
||||
show_json = self._download_json(self._API_URL.format(show_name), video_id=show_name)
|
||||
if show_json.get('success'):
|
||||
for episode in show_json['result']['data']:
|
||||
yield self.url_result(
|
||||
'https://www.paramountplus.com%s' % episode['url'],
|
||||
ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
||||
@@ -427,7 +427,7 @@ class PeerTubeIE(InfoExtractor):
|
||||
''' % (_INSTANCES_RE, _UUID_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||
'md5': '9bed8c0137913e17b86334e5885aacff',
|
||||
'md5': '8563064d245a4be5705bddb22bb00a28',
|
||||
'info_dict': {
|
||||
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||
'ext': 'mp4',
|
||||
@@ -570,7 +570,7 @@ class PeerTubeIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = video.get('description')
|
||||
if len(description) >= 250:
|
||||
if description and len(description) >= 250:
|
||||
# description is shortened
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
|
||||
@@ -3,13 +3,15 @@ from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
strip_or_none,
|
||||
try_get
|
||||
)
|
||||
@@ -30,7 +32,7 @@ class RCTIPlusBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/programs/\d+?/.*?/(?P<type>episode|clip|extra)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
|
||||
'md5': '56ed45affad45fa18d5592a1bc199997',
|
||||
@@ -87,6 +89,52 @@ class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, { # Missed event/replay
|
||||
'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib',
|
||||
'md5': '649c5f27250faed1452ca8b91e06922d',
|
||||
'info_dict': {
|
||||
'id': 'v_pe2507',
|
||||
'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB',
|
||||
'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1627142400,
|
||||
'upload_date': '20210724',
|
||||
'was_live': True,
|
||||
'release_timestamp': 1627369200,
|
||||
},
|
||||
'params': {
|
||||
'fixup': 'never',
|
||||
},
|
||||
}, { # Live event; Cloudfront CDN
|
||||
'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
|
||||
'info_dict': {
|
||||
'id': 'v_le2530',
|
||||
'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB',
|
||||
'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1627898400,
|
||||
'upload_date': '20210802',
|
||||
'release_timestamp': 1628067600,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This live event has ended.',
|
||||
}, { # TV; live_at is null
|
||||
'url': 'https://www.rctiplus.com/live-event/1/rcti',
|
||||
'info_dict': {
|
||||
'id': 'v_lt1',
|
||||
'title': 'RCTI',
|
||||
'display_id': 'rcti',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1546344000,
|
||||
'upload_date': '20190101',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
_CONVIVA_JSON_TEMPLATE = {
|
||||
't': 'CwsSessionHb',
|
||||
@@ -99,21 +147,22 @@ class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
'sdk': True,
|
||||
}
|
||||
|
||||
def _search_auth_key(self, webpage):
|
||||
try:
|
||||
self._AUTH_KEY = self._search_regex(
|
||||
r'\'Authorization\':"(?P<auth>[^"]+)"', webpage, 'auth-key')
|
||||
except RegexNotFoundError:
|
||||
pass
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
self._search_auth_key(webpage)
|
||||
match = re.match(self._VALID_URL, url).groupdict()
|
||||
video_type, video_id, display_id = match['type'], match['id'], match['display_id']
|
||||
|
||||
url_api_version = 'v2' if video_type == 'missed-event' else 'v1'
|
||||
appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator
|
||||
video_json = self._call_api(
|
||||
'https://api.rctiplus.com/api/v1/%s/%s/url?appierid=.1' % (video_type, video_id), display_id, 'Downloading video URL JSON')[0]
|
||||
f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0]
|
||||
video_url = video_json['url']
|
||||
|
||||
is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at'])
|
||||
if is_upcoming is None:
|
||||
is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date'])
|
||||
if is_upcoming:
|
||||
self.raise_no_formats(
|
||||
'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True)
|
||||
if 'akamaized' in video_url:
|
||||
# For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API
|
||||
conviva_json_data = {
|
||||
@@ -142,22 +191,27 @@ class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
'id': 'landscape_image',
|
||||
'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image'])
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
|
||||
try:
|
||||
formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted(countries=['ID'], metadata_available=True)
|
||||
else:
|
||||
raise e
|
||||
for f in formats:
|
||||
if 'akamaized' in f['url']:
|
||||
f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai CDNs
|
||||
if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
|
||||
f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_meta.get('product_id') or video_json.get('product_id'),
|
||||
'title': video_meta.get('title') or video_json.get('content_name'),
|
||||
'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
|
||||
'display_id': display_id,
|
||||
'description': video_meta.get('summary'),
|
||||
'timestamp': video_meta.get('release_date'),
|
||||
'timestamp': video_meta.get('release_date') or video_json.get('start_date'),
|
||||
'duration': video_meta.get('duration'),
|
||||
'categories': [video_meta.get('genre')],
|
||||
'categories': [video_meta['genre']] if video_meta.get('genre') else None,
|
||||
'average_rating': video_meta.get('star_rating'),
|
||||
'series': video_meta.get('program_title') or video_json.get('program_title'),
|
||||
'season_number': video_meta.get('season'),
|
||||
@@ -165,12 +219,16 @@ class RCTIPlusIE(RCTIPlusBaseIE):
|
||||
'channel': video_json.get('tv_name'),
|
||||
'channel_id': video_json.get('tv_id'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails
|
||||
'thumbnails': thumbnails,
|
||||
'is_live': video_type == 'live-event' and not is_upcoming,
|
||||
'was_live': video_type == 'missed-event',
|
||||
'live_status': 'is_upcoming' if is_upcoming else None,
|
||||
'release_timestamp': video_json.get('live_at'),
|
||||
}
|
||||
|
||||
|
||||
class RCTIPlusSeriesIE(RCTIPlusBaseIE):
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:\W)*$'
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rctiplus.com/programs/540/upin-ipin',
|
||||
'playlist_mincount': 417,
|
||||
@@ -180,7 +238,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
|
||||
'description': 'md5:22cc912381f389664416844e1ec4f86b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rctiplus.com/programs/540/upin-ipin/#',
|
||||
'url': 'https://www.rctiplus.com/programs/540/upin-ipin/episodes?utm_source=Rplusdweb&utm_medium=share_copy&utm_campaign=programsupin-ipin',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
|
||||
@@ -193,6 +251,10 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
|
||||
'D': 18,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url)
|
||||
|
||||
def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}):
|
||||
total_pages = 0
|
||||
try:
|
||||
@@ -253,3 +315,41 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
|
||||
display_id, 'Downloading extra entries', metadata))
|
||||
|
||||
return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata)
|
||||
|
||||
|
||||
class RCTIPlusTVIE(RCTIPlusBaseIE):
|
||||
_VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rctiplus.com/tv/rcti',
|
||||
'info_dict': {
|
||||
'id': 'v_lt1',
|
||||
'title': 'RCTI',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1546344000,
|
||||
'upload_date': '20190101',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
}
|
||||
}, {
|
||||
# Returned video will always change
|
||||
'url': 'https://www.rctiplus.com/live-event',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Returned video will also always change
|
||||
'url': 'https://www.rctiplus.com/missed-event',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
match = re.match(self._VALID_URL, url).groupdict()
|
||||
tv_id = match.get('tvname') or match.get('eventname')
|
||||
webpage = self._download_webpage(url, tv_id)
|
||||
video_type, video_id = self._search_regex(
|
||||
r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id'))
|
||||
return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus')
|
||||
|
||||
@@ -144,7 +144,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
if '/secure/' in video_url and '?hdnea=' in video_url:
|
||||
for f in m3u8_formats:
|
||||
f['_seekable'] = False
|
||||
f['_ffmpeg_args'] = ['-seekable', '0']
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
|
||||
@@ -864,6 +864,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
'md5': '761769e1eafce0ffebfb4089cb3847cd',
|
||||
'info_dict': {
|
||||
'id': '42850523',
|
||||
'display_id': 'FaintLightGullWholeWheat',
|
||||
'ext': 'mp4',
|
||||
'title': 'EA Play 2016 Live from the Novo Theatre',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
@@ -976,6 +977,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
|
||||
return {
|
||||
'id': clip.get('id') or video_id,
|
||||
'display_id': video_id,
|
||||
'title': clip.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(clip.get('durationSeconds')),
|
||||
|
||||
@@ -73,7 +73,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
data=json.dumps(data).encode('utf-8') if data else None,
|
||||
headers=({'x-viki-app-ver': self._APP_VERSION} if data
|
||||
else self._stream_headers(timestamp, sig) if query is None
|
||||
else None)) or {}
|
||||
else None), expected_status=400) or {}
|
||||
|
||||
self._raise_error(resp.get('error'), fatal)
|
||||
return resp
|
||||
|
||||
@@ -88,6 +88,7 @@ class VineIE(InfoExtractor):
|
||||
'format_id': format_id or 'standard',
|
||||
'quality': quality,
|
||||
})
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
username = data.get('username')
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@@ -217,7 +218,7 @@ class VRVIE(VRVBaseIE):
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
|
||||
for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)):
|
||||
thumbnail_url = thumbnail.get('source')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
|
||||
@@ -1945,10 +1945,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
|
||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
# Obsolete patterns
|
||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
@@ -2455,7 +2455,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
ytcfg=player_ytcfg, headers=headers, fatal=False,
|
||||
ytcfg=player_ytcfg, headers=headers, fatal=True,
|
||||
default_client=client,
|
||||
note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
|
||||
) or None
|
||||
@@ -2505,17 +2505,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
|
||||
clients.append(client_name)
|
||||
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
yielded_pr = False
|
||||
if initial_pr:
|
||||
pr = dict(initial_pr)
|
||||
pr['streamingData'] = None
|
||||
yielded_pr = True
|
||||
yield pr
|
||||
|
||||
last_error = None
|
||||
while clients:
|
||||
client = clients.pop()
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
if 'configs' not in self._configuration_arg('player_skip'):
|
||||
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
|
||||
|
||||
pr = (
|
||||
initial_pr if client == 'web' and initial_pr
|
||||
else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
|
||||
try:
|
||||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
self.report_warning(last_error)
|
||||
last_error = e
|
||||
continue
|
||||
|
||||
if pr:
|
||||
yielded_pr = True
|
||||
yield pr
|
||||
|
||||
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
|
||||
@@ -2524,13 +2542,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif self._is_agegated(pr):
|
||||
append_client(f'{client}_agegate')
|
||||
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
if initial_pr and 'web' not in original_clients:
|
||||
initial_pr['streamingData'] = None
|
||||
yield initial_pr
|
||||
if last_error:
|
||||
if not yielded_pr:
|
||||
raise last_error
|
||||
self.report_warning(last_error)
|
||||
|
||||
def _extract_formats(self, streaming_data, video_id, player_url, is_live):
|
||||
itags, stream_ids = [], []
|
||||
@@ -2628,7 +2643,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
yield dct
|
||||
|
||||
skip_manifests = self._configuration_arg('skip')
|
||||
get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
|
||||
get_dash = (
|
||||
(not is_live or self._configuration_arg('include_live_dash'))
|
||||
and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
|
||||
get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
|
||||
|
||||
def guess_quality(f):
|
||||
@@ -3458,7 +3475,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||
'info_dict': {
|
||||
'id': 'FMtPN8yp5LU', # This will keep changing
|
||||
'id': '3yImotZU3tw', # This will keep changing
|
||||
'ext': 'mp4',
|
||||
'title': compat_str,
|
||||
'uploader': 'Sky News',
|
||||
|
||||
@@ -23,7 +23,7 @@ from .cookies import SUPPORTED_BROWSERS
|
||||
from .version import __version__
|
||||
|
||||
from .downloader.external import list_external_downloaders
|
||||
from .postprocessor.ffmpeg import (
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegSubtitlesConvertorPP,
|
||||
FFmpegThumbnailsConvertorPP,
|
||||
@@ -190,15 +190,15 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent', default=False,
|
||||
help='Display the current browser identification')
|
||||
help='Display the current user-agent and exit')
|
||||
general.add_option(
|
||||
'--list-extractors',
|
||||
action='store_true', dest='list_extractors', default=False,
|
||||
help='List all supported extractors')
|
||||
help='List all supported extractors and exit')
|
||||
general.add_option(
|
||||
'--extractor-descriptions',
|
||||
action='store_true', dest='list_extractor_descriptions', default=False,
|
||||
help='Output descriptions of all supported extractors')
|
||||
help='Output descriptions of all supported extractors and exit')
|
||||
general.add_option(
|
||||
'--force-generic-extractor',
|
||||
action='store_true', dest='force_generic_extractor', default=False,
|
||||
@@ -223,12 +223,6 @@ def parseOpts(overrideArguments=None):
|
||||
'--flat-playlist',
|
||||
action='store_const', dest='extract_flat', const='in_playlist', default=False,
|
||||
help='Do not extract the videos of a playlist, only list them')
|
||||
general.add_option(
|
||||
'--flat-videos',
|
||||
action='store_true', dest='extract_flat',
|
||||
# help='Do not resolve the video urls')
|
||||
# doesn't work
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
general.add_option(
|
||||
'--no-flat-playlist',
|
||||
action='store_false', dest='extract_flat',
|
||||
@@ -375,22 +369,17 @@ def parseOpts(overrideArguments=None):
|
||||
'--match-filter',
|
||||
metavar='FILTER', dest='match_filter', default=None,
|
||||
help=(
|
||||
'Generic video filter. '
|
||||
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
|
||||
'match if the key is present, '
|
||||
'!key to check if the key is not present, '
|
||||
'key>NUMBER (like "view_count > 12", also works with '
|
||||
'>=, <, <=, !=, =) to compare against a number, '
|
||||
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
|
||||
'to match against a string literal '
|
||||
'and & to require multiple matches. '
|
||||
'Values which are not known are excluded unless you '
|
||||
'put a question mark (?) after the operator. '
|
||||
'For example, to only match videos that have been liked more than '
|
||||
'100 times and disliked less than 50 times (or the dislike '
|
||||
'functionality is not available at the given service), but who '
|
||||
'also have a description, use --match-filter '
|
||||
'"like_count > 100 & dislike_count <? 50 & description"'))
|
||||
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
|
||||
'number or a string using the operators defined in "Filtering formats". '
|
||||
'You can also simply specify a field to match if the field is present '
|
||||
'and "!field" to check if the field is not present. In addition, '
|
||||
'Python style regular expression matching can be done using "~=", '
|
||||
'and multiple filters can be checked with "&". '
|
||||
'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter '
|
||||
r'"!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'" '
|
||||
'matches only videos that are not live, has a like count more than 100 '
|
||||
'(or the like field is not available), and also has a description '
|
||||
'that contains the phrase "cats & dogs" (ignoring case)'))
|
||||
selection.add_option(
|
||||
'--no-match-filter',
|
||||
metavar='FILTER', dest='match_filter', action='store_const', const=None,
|
||||
@@ -537,7 +526,7 @@ def parseOpts(overrideArguments=None):
|
||||
video_format.add_option(
|
||||
'-F', '--list-formats',
|
||||
action='store_true', dest='listformats',
|
||||
help='List all available formats of requested videos')
|
||||
help='List available formats of each video. Simulate unless --no-simulate is used')
|
||||
video_format.add_option(
|
||||
'--list-formats-as-table',
|
||||
action='store_true', dest='listformats_table', default=True,
|
||||
@@ -588,7 +577,7 @@ def parseOpts(overrideArguments=None):
|
||||
subtitles.add_option(
|
||||
'--list-subs',
|
||||
action='store_true', dest='listsubtitles', default=False,
|
||||
help='List all available subtitles for the video')
|
||||
help='List available subtitles of each video. Simulate unless --no-simulate is used')
|
||||
subtitles.add_option(
|
||||
'--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
|
||||
@@ -786,21 +775,25 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option(
|
||||
'-q', '--quiet',
|
||||
action='store_true', dest='quiet', default=False,
|
||||
help='Activate quiet mode')
|
||||
help='Activate quiet mode. If used with --verbose, print the log to stderr')
|
||||
verbosity.add_option(
|
||||
'--no-warnings',
|
||||
dest='no_warnings', action='store_true', default=False,
|
||||
help='Ignore warnings')
|
||||
verbosity.add_option(
|
||||
'-s', '--simulate',
|
||||
action='store_true', dest='simulate', default=False,
|
||||
action='store_true', dest='simulate', default=None,
|
||||
help='Do not download the video and do not write anything to disk')
|
||||
verbosity.add_option(
|
||||
'--no-simulate',
|
||||
action='store_false', dest='simulate',
|
||||
help='Download the video even if printing/listing options are used')
|
||||
verbosity.add_option(
|
||||
'--ignore-no-formats-error',
|
||||
action='store_true', dest='ignore_no_formats_error', default=False,
|
||||
help=(
|
||||
'Ignore "No video formats" error. Usefull for extracting metadata '
|
||||
'even if the video is not actually available for download (experimental)'))
|
||||
'even if the videos are not actually available for download (experimental)'))
|
||||
verbosity.add_option(
|
||||
'--no-ignore-no-formats-error',
|
||||
action='store_false', dest='ignore_no_formats_error',
|
||||
@@ -810,12 +803,11 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='skip_download', default=False,
|
||||
help='Do not download the video but write all related files (Alias: --no-download)')
|
||||
verbosity.add_option(
|
||||
'-O', '--print', metavar='TEMPLATE',
|
||||
action='callback', dest='forceprint', type='str', default=[],
|
||||
callback=_list_from_options_callback, callback_kwargs={'delim': None},
|
||||
'-O', '--print',
|
||||
metavar='TEMPLATE', action='append', dest='forceprint',
|
||||
help=(
|
||||
'Simulate, quiet but print the given fields. Either a field name '
|
||||
'or similar formatting as the output template can be used'))
|
||||
'Quiet, but print the given fields for each video. Simulate unless --no-simulate is used. '
|
||||
'Either a field name or same syntax as the output template can be used'))
|
||||
verbosity.add_option(
|
||||
'-g', '--get-url',
|
||||
action='store_true', dest='geturl', default=False,
|
||||
@@ -851,17 +843,17 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option(
|
||||
'-j', '--dump-json',
|
||||
action='store_true', dest='dumpjson', default=False,
|
||||
help='Simulate, quiet but print JSON information. See "OUTPUT TEMPLATE" for a description of available keys')
|
||||
help='Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys')
|
||||
verbosity.add_option(
|
||||
'-J', '--dump-single-json',
|
||||
action='store_true', dest='dump_single_json', default=False,
|
||||
help=(
|
||||
'Simulate, quiet but print JSON information for each command-line argument. '
|
||||
'If the URL refers to a playlist, dump the whole playlist information in a single line'))
|
||||
'Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. '
|
||||
'If the URL refers to a playlist, the whole playlist information is dumped in a single line'))
|
||||
verbosity.add_option(
|
||||
'--print-json',
|
||||
action='store_true', dest='print_json', default=False,
|
||||
help='Be quiet and print the video information as JSON (video is still being downloaded)')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
verbosity.add_option(
|
||||
'--force-write-archive', '--force-write-download-archive', '--force-download-archive',
|
||||
action='store_true', dest='force_write_download_archive', default=False,
|
||||
@@ -1132,7 +1124,7 @@ def parseOpts(overrideArguments=None):
|
||||
thumbnail.add_option(
|
||||
'--list-thumbnails',
|
||||
action='store_true', dest='list_thumbnails', default=False,
|
||||
help='Simulate and list all available thumbnail formats')
|
||||
help='List available thumbnails of each video. Simulate unless --no-simulate is used')
|
||||
|
||||
link = optparse.OptionGroup(parser, 'Internet Shortcut Options')
|
||||
link.add_option(
|
||||
@@ -1248,10 +1240,14 @@ def parseOpts(overrideArguments=None):
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
postproc.add_option(
|
||||
'--parse-metadata',
|
||||
metavar='FROM:TO', dest='metafromfield', action='append',
|
||||
metavar='FROM:TO', dest='parse_metadata', action='append',
|
||||
help=(
|
||||
'Parse additional metadata like title/artist from other fields; '
|
||||
'see "MODIFYING METADATA" for details'))
|
||||
postproc.add_option(
|
||||
'--replace-in-metadata',
|
||||
dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
|
||||
help='Replace text in a metadata field using the given regex. This option can be used multiple times')
|
||||
postproc.add_option(
|
||||
'--xattrs',
|
||||
action='store_true', dest='xattrs', default=False,
|
||||
@@ -1278,17 +1274,29 @@ def parseOpts(overrideArguments=None):
|
||||
dest='ffmpeg_location',
|
||||
help='Location of the ffmpeg binary; either the path to the binary or its containing directory')
|
||||
postproc.add_option(
|
||||
'--exec',
|
||||
metavar='CMD', dest='exec_cmd',
|
||||
'--exec', metavar='CMD',
|
||||
action='append', dest='exec_cmd',
|
||||
help=(
|
||||
'Execute a command on the file after downloading and post-processing. '
|
||||
'Similar syntax to the output template can be used to pass any field as arguments to the command. '
|
||||
'Same syntax as the output template can be used to pass any field as arguments to the command. '
|
||||
'An additional field "filepath" that contains the final path of the downloaded file is also available. '
|
||||
'If no fields are passed, %(filepath)q is appended to the end of the command'))
|
||||
'If no fields are passed, %(filepath)q is appended to the end of the command. '
|
||||
'This option can be used multiple times'))
|
||||
postproc.add_option(
|
||||
'--exec-before-download',
|
||||
metavar='CMD', dest='exec_before_dl_cmd',
|
||||
help='Execute a command before the actual download. The syntax is the same as --exec but "filepath" is not available')
|
||||
'--no-exec',
|
||||
action='store_const', dest='exec_cmd', const=[],
|
||||
help='Remove any previously defined --exec')
|
||||
postproc.add_option(
|
||||
'--exec-before-download', metavar='CMD',
|
||||
action='append', dest='exec_before_dl_cmd',
|
||||
help=(
|
||||
'Execute a command before the actual download. '
|
||||
'The syntax is the same as --exec but "filepath" is not available. '
|
||||
'This option can be used multiple times'))
|
||||
postproc.add_option(
|
||||
'--no-exec-before-download',
|
||||
action='store_const', dest='exec_before_dl_cmd', const=[],
|
||||
help='Remove any previously defined --exec-before-download')
|
||||
postproc.add_option(
|
||||
'--convert-subs', '--convert-sub', '--convert-subtitles',
|
||||
metavar='FORMAT', dest='convertsubtitles', default=None,
|
||||
@@ -1372,7 +1380,7 @@ def parseOpts(overrideArguments=None):
|
||||
'--no-hls-split-discontinuity',
|
||||
dest='hls_split_discontinuity', action='store_false',
|
||||
help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)')
|
||||
_extractor_arg_parser = lambda key, vals='': (key.strip().lower(), [val.strip() for val in vals.split(',')])
|
||||
_extractor_arg_parser = lambda key, vals='': (key.strip().lower().replace('-', '_'), [val.strip() for val in vals.split(',')])
|
||||
extractor.add_option(
|
||||
'--extractor-args',
|
||||
metavar='KEY:ARGS', dest='extractor_args', default={}, type='str',
|
||||
|
||||
@@ -19,9 +19,12 @@ from .ffmpeg import (
|
||||
FFmpegVideoRemuxerPP,
|
||||
)
|
||||
from .xattrpp import XAttrMetadataPP
|
||||
from .execafterdownload import ExecAfterDownloadPP
|
||||
from .metadatafromfield import MetadataFromFieldPP
|
||||
from .metadatafromfield import MetadataFromTitlePP
|
||||
from .exec import ExecPP, ExecAfterDownloadPP
|
||||
from .metadataparser import (
|
||||
MetadataFromFieldPP,
|
||||
MetadataFromTitlePP,
|
||||
MetadataParserPP,
|
||||
)
|
||||
from .movefilesafterdownload import MoveFilesAfterDownloadPP
|
||||
from .sponskrub import SponSkrubPP
|
||||
|
||||
@@ -33,6 +36,7 @@ def get_postprocessor(key):
|
||||
__all__ = [
|
||||
'FFmpegPostProcessor',
|
||||
'EmbedThumbnailPP',
|
||||
'ExecPP',
|
||||
'ExecAfterDownloadPP',
|
||||
'FFmpegEmbedSubtitlePP',
|
||||
'FFmpegExtractAudioPP',
|
||||
@@ -48,6 +52,7 @@ __all__ = [
|
||||
'FFmpegThumbnailsConvertorPP',
|
||||
'FFmpegVideoConvertorPP',
|
||||
'FFmpegVideoRemuxerPP',
|
||||
'MetadataParserPP',
|
||||
'MetadataFromFieldPP',
|
||||
'MetadataFromTitlePP',
|
||||
'MoveFilesAfterDownloadPP',
|
||||
|
||||
@@ -7,18 +7,15 @@ from ..compat import compat_shlex_quote
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
PostProcessingError,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(PostProcessor):
|
||||
class ExecPP(PostProcessor):
|
||||
|
||||
def __init__(self, downloader, exec_cmd):
|
||||
super(ExecAfterDownloadPP, self).__init__(downloader)
|
||||
self.exec_cmd = exec_cmd
|
||||
|
||||
@classmethod
|
||||
def pp_key(cls):
|
||||
return 'Exec'
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self.exec_cmd = variadic(exec_cmd)
|
||||
|
||||
def parse_cmd(self, cmd, info):
|
||||
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
|
||||
@@ -32,9 +29,14 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
info.get('filepath') or info['_filename']))
|
||||
|
||||
def run(self, info):
|
||||
cmd = self.parse_cmd(self.exec_cmd, info)
|
||||
self.to_screen('Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||
if retCode != 0:
|
||||
raise PostProcessingError('Command returned error code %d' % retCode)
|
||||
for tmpl in self.exec_cmd:
|
||||
cmd = self.parse_cmd(tmpl, info)
|
||||
self.to_screen('Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(encodeArgument(cmd), shell=True)
|
||||
if retCode != 0:
|
||||
raise PostProcessingError('Command returned error code %d' % retCode)
|
||||
return [], info
|
||||
|
||||
|
||||
class ExecAfterDownloadPP(ExecPP): # for backward compatibility
|
||||
pass
|
||||
@@ -109,21 +109,19 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
'Continuing without ffmpeg.' % (location))
|
||||
self._versions = {}
|
||||
return
|
||||
elif not os.path.isdir(location):
|
||||
elif os.path.isdir(location):
|
||||
dirname, basename = location, None
|
||||
else:
|
||||
basename = os.path.splitext(os.path.basename(location))[0]
|
||||
if basename not in programs:
|
||||
self.report_warning(
|
||||
'Cannot identify executable %s, its basename should be one of %s. '
|
||||
'Continuing without ffmpeg.' %
|
||||
(location, ', '.join(programs)))
|
||||
self._versions = {}
|
||||
return None
|
||||
location = os.path.dirname(os.path.abspath(location))
|
||||
basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
|
||||
dirname = os.path.dirname(os.path.abspath(location))
|
||||
if basename in ('ffmpeg', 'ffprobe'):
|
||||
prefer_ffmpeg = True
|
||||
|
||||
self._paths = dict(
|
||||
(p, os.path.join(location, p)) for p in programs)
|
||||
(p, os.path.join(dirname, p)) for p in programs)
|
||||
if basename:
|
||||
self._paths[basename] = location
|
||||
self._versions = dict(
|
||||
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
|
||||
if self._versions is None:
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class MetadataFromFieldPP(PostProcessor):
|
||||
regex = r'(?P<in>.*?)(?<!\\):(?P<out>.+)$'
|
||||
|
||||
def __init__(self, downloader, formats):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
assert isinstance(formats, (list, tuple))
|
||||
self._data = []
|
||||
for f in formats:
|
||||
assert isinstance(f, compat_str)
|
||||
match = re.match(self.regex, f)
|
||||
assert match is not None
|
||||
inp = match.group('in').replace('\\:', ':')
|
||||
self._data.append({
|
||||
'in': inp,
|
||||
'out': match.group('out'),
|
||||
'tmpl': self.field_to_template(inp),
|
||||
'regex': self.format_to_regex(match.group('out')),
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
def field_to_template(tmpl):
|
||||
if re.match(r'[a-zA-Z_]+$', tmpl):
|
||||
return '%%(%s)s' % tmpl
|
||||
return tmpl
|
||||
|
||||
@staticmethod
|
||||
def format_to_regex(fmt):
|
||||
r"""
|
||||
Converts a string like
|
||||
'%(title)s - %(artist)s'
|
||||
to a regex like
|
||||
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
||||
"""
|
||||
if not re.search(r'%\(\w+\)s', fmt):
|
||||
return fmt
|
||||
lastpos = 0
|
||||
regex = ''
|
||||
# replace %(..)s with regex group and escape other string parts
|
||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||
regex += re.escape(fmt[lastpos:match.start()])
|
||||
regex += r'(?P<%s>.+)' % match.group(1)
|
||||
lastpos = match.end()
|
||||
if lastpos < len(fmt):
|
||||
regex += re.escape(fmt[lastpos:])
|
||||
return regex
|
||||
|
||||
def run(self, info):
|
||||
for dictn in self._data:
|
||||
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
|
||||
data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict
|
||||
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
|
||||
match = re.search(dictn['regex'], data_to_parse)
|
||||
if match is None:
|
||||
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
|
||||
continue
|
||||
for attribute, value in match.groupdict().items():
|
||||
info[attribute] = value
|
||||
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA'))
|
||||
return [], info
|
||||
|
||||
|
||||
class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
|
||||
def __init__(self, downloader, titleformat):
|
||||
super(MetadataFromTitlePP, self).__init__(downloader, ['%%(title)s:%s' % titleformat])
|
||||
self._titleformat = titleformat
|
||||
self._titleregex = self._data[0]['regex']
|
||||
117
yt_dlp/postprocessor/metadataparser.py
Normal file
117
yt_dlp/postprocessor/metadataparser.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import re
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from .common import PostProcessor
|
||||
|
||||
|
||||
class MetadataParserPP(PostProcessor):
|
||||
class Actions(Enum):
|
||||
INTERPRET = 'interpretter'
|
||||
REPLACE = 'replacer'
|
||||
|
||||
def __init__(self, downloader, actions):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self._actions = []
|
||||
for f in actions:
|
||||
action = f[0]
|
||||
assert isinstance(action, self.Actions)
|
||||
self._actions.append(getattr(self, action._value_)(*f[1:]))
|
||||
|
||||
@classmethod
|
||||
def validate_action(cls, action, *data):
|
||||
''' Each action can be:
|
||||
(Actions.INTERPRET, from, to) OR
|
||||
(Actions.REPLACE, field, search, replace)
|
||||
'''
|
||||
if not isinstance(action, cls.Actions):
|
||||
raise ValueError(f'{action!r} is not a valid action')
|
||||
getattr(cls, action._value_)(cls, *data)
|
||||
|
||||
@staticmethod
|
||||
def field_to_template(tmpl):
|
||||
if re.match(r'[a-zA-Z_]+$', tmpl):
|
||||
return f'%({tmpl})s'
|
||||
return tmpl
|
||||
|
||||
@staticmethod
|
||||
def format_to_regex(fmt):
|
||||
r"""
|
||||
Converts a string like
|
||||
'%(title)s - %(artist)s'
|
||||
to a regex like
|
||||
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
||||
"""
|
||||
if not re.search(r'%\(\w+\)s', fmt):
|
||||
return fmt
|
||||
lastpos = 0
|
||||
regex = ''
|
||||
# replace %(..)s with regex group and escape other string parts
|
||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||
regex += re.escape(fmt[lastpos:match.start()])
|
||||
regex += rf'(?P<{match.group(1)}>.+)'
|
||||
lastpos = match.end()
|
||||
if lastpos < len(fmt):
|
||||
regex += re.escape(fmt[lastpos:])
|
||||
return regex
|
||||
|
||||
def run(self, info):
|
||||
for f in self._actions:
|
||||
f(info)
|
||||
return [], info
|
||||
|
||||
def interpretter(self, inp, out):
|
||||
def f(info):
|
||||
outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info)
|
||||
data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
|
||||
self.write_debug(f'Searching for r{out_re.pattern!r} in {template!r}')
|
||||
match = out_re.search(data_to_parse)
|
||||
if match is None:
|
||||
self.report_warning('Could not interpret {inp!r} as {out!r}')
|
||||
return
|
||||
for attribute, value in match.groupdict().items():
|
||||
info[attribute] = value
|
||||
self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
|
||||
|
||||
template = self.field_to_template(inp)
|
||||
out_re = re.compile(self.format_to_regex(out))
|
||||
return f
|
||||
|
||||
def replacer(self, field, search, replace):
|
||||
def f(info):
|
||||
val = info.get(field)
|
||||
if val is None:
|
||||
self.report_warning(f'Video does not have a {field}')
|
||||
return
|
||||
elif not isinstance(val, str):
|
||||
self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
|
||||
return
|
||||
self.write_debug(f'Replacing all r{search!r} in {field} with {replace!r}')
|
||||
info[field], n = search_re.subn(replace, val)
|
||||
if n:
|
||||
self.to_screen(f'Changed {field} to: {info[field]}')
|
||||
else:
|
||||
self.to_screen(f'Did not find r{search!r} in {field}')
|
||||
|
||||
search_re = re.compile(search)
|
||||
return f
|
||||
|
||||
|
||||
class MetadataFromFieldPP(MetadataParserPP):
|
||||
@classmethod
|
||||
def to_action(cls, f):
|
||||
match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
|
||||
if match is None:
|
||||
raise ValueError(f'it should be FROM:TO, not {f!r}')
|
||||
return (
|
||||
cls.Actions.INTERPRET,
|
||||
match.group('in').replace('\\:', ':'),
|
||||
match.group('out'))
|
||||
|
||||
def __init__(self, downloader, formats):
|
||||
MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats])
|
||||
|
||||
|
||||
class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility
|
||||
def __init__(self, downloader, titleformat):
|
||||
MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
|
||||
120
yt_dlp/utils.py
120
yt_dlp/utils.py
@@ -1836,7 +1836,7 @@ def write_json_file(obj, fn):
|
||||
|
||||
try:
|
||||
with tf:
|
||||
json.dump(obj, tf, default=repr)
|
||||
json.dump(obj, tf)
|
||||
if sys.platform == 'win32':
|
||||
# Need to remove existing file on Windows, else os.rename raises
|
||||
# WindowsError or FileExistsError.
|
||||
@@ -4041,15 +4041,31 @@ class LazyList(collections.abc.Sequence):
|
||||
return repr(self.exhaust())
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
class PagedList:
|
||||
def __len__(self):
|
||||
# This is only useful for tests
|
||||
return len(self.getslice())
|
||||
|
||||
def getslice(self, start, end):
|
||||
def __init__(self, pagefunc, pagesize, use_cache=True):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
self._use_cache = use_cache
|
||||
self._cache = {}
|
||||
|
||||
def getpage(self, pagenum):
|
||||
page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
|
||||
if self._use_cache:
|
||||
self._cache[pagenum] = page_results
|
||||
return page_results
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
return list(self._getslice(start, end))
|
||||
|
||||
def _getslice(self, start, end):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def __getitem__(self, idx):
|
||||
# NOTE: cache must be enabled if this is used
|
||||
if not isinstance(idx, int) or idx < 0:
|
||||
raise TypeError('indices must be non-negative integers')
|
||||
entries = self.getslice(idx, idx + 1)
|
||||
@@ -4057,42 +4073,26 @@ class PagedList(object):
|
||||
|
||||
|
||||
class OnDemandPagedList(PagedList):
|
||||
def __init__(self, pagefunc, pagesize, use_cache=True):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
self._use_cache = use_cache
|
||||
if use_cache:
|
||||
self._cache = {}
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
def _getslice(self, start, end):
|
||||
for pagenum in itertools.count(start // self._pagesize):
|
||||
firstid = pagenum * self._pagesize
|
||||
nextfirstid = pagenum * self._pagesize + self._pagesize
|
||||
if start >= nextfirstid:
|
||||
continue
|
||||
|
||||
page_results = None
|
||||
if self._use_cache:
|
||||
page_results = self._cache.get(pagenum)
|
||||
if page_results is None:
|
||||
page_results = list(self._pagefunc(pagenum))
|
||||
if self._use_cache:
|
||||
self._cache[pagenum] = page_results
|
||||
|
||||
startv = (
|
||||
start % self._pagesize
|
||||
if firstid <= start < nextfirstid
|
||||
else 0)
|
||||
|
||||
endv = (
|
||||
((end - 1) % self._pagesize) + 1
|
||||
if (end is not None and firstid <= end <= nextfirstid)
|
||||
else None)
|
||||
|
||||
page_results = self.getpage(pagenum)
|
||||
if startv != 0 or endv is not None:
|
||||
page_results = page_results[startv:endv]
|
||||
res.extend(page_results)
|
||||
yield from page_results
|
||||
|
||||
# A little optimization - if current page is not "full", ie. does
|
||||
# not contain page_size videos then we can assume that this page
|
||||
@@ -4105,36 +4105,31 @@ class OnDemandPagedList(PagedList):
|
||||
# break out early as well
|
||||
if end == nextfirstid:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
class InAdvancePagedList(PagedList):
|
||||
def __init__(self, pagefunc, pagecount, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagecount = pagecount
|
||||
self._pagesize = pagesize
|
||||
PagedList.__init__(self, pagefunc, pagesize, True)
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
def _getslice(self, start, end):
|
||||
start_page = start // self._pagesize
|
||||
end_page = (
|
||||
self._pagecount if end is None else (end // self._pagesize + 1))
|
||||
skip_elems = start - start_page * self._pagesize
|
||||
only_more = None if end is None else end - start
|
||||
for pagenum in range(start_page, end_page):
|
||||
page = list(self._pagefunc(pagenum))
|
||||
page_results = self.getpage(pagenum)
|
||||
if skip_elems:
|
||||
page = page[skip_elems:]
|
||||
page_results = page_results[skip_elems:]
|
||||
skip_elems = None
|
||||
if only_more is not None:
|
||||
if len(page) < only_more:
|
||||
only_more -= len(page)
|
||||
if len(page_results) < only_more:
|
||||
only_more -= len(page_results)
|
||||
else:
|
||||
page = page[:only_more]
|
||||
res.extend(page)
|
||||
yield from page_results[:only_more]
|
||||
break
|
||||
res.extend(page)
|
||||
return res
|
||||
yield from page_results
|
||||
|
||||
|
||||
def uppercase_escape(s):
|
||||
@@ -4663,27 +4658,39 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
|
||||
|
||||
|
||||
def _match_one(filter_part, dct):
|
||||
COMPARISON_OPERATORS = {
|
||||
'<': operator.lt,
|
||||
'<=': operator.le,
|
||||
'>': operator.gt,
|
||||
'>=': operator.ge,
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
# TODO: Generalize code with YoutubeDL._build_format_filter
|
||||
STRING_OPERATORS = {
|
||||
'*=': operator.contains,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'~=': lambda attr, value: re.search(value, attr),
|
||||
}
|
||||
COMPARISON_OPERATORS = {
|
||||
**STRING_OPERATORS,
|
||||
'<=': operator.le, # "<=" must be defined above "<"
|
||||
'<': operator.lt,
|
||||
'>=': operator.ge,
|
||||
'>': operator.gt,
|
||||
'=': operator.eq,
|
||||
}
|
||||
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>[a-z_]+)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?:
|
||||
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
||||
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
||||
(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
|
||||
(?P<strval>.+?)
|
||||
)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
|
||||
m = operator_rex.search(filter_part)
|
||||
if m:
|
||||
op = COMPARISON_OPERATORS[m.group('op')]
|
||||
unnegated_op = COMPARISON_OPERATORS[m.group('op')]
|
||||
if m.group('negation'):
|
||||
op = lambda attr, value: not unnegated_op(attr, value)
|
||||
else:
|
||||
op = unnegated_op
|
||||
actual_value = dct.get(m.group('key'))
|
||||
if (m.group('quotedstrval') is not None
|
||||
or m.group('strval') is not None
|
||||
@@ -4693,14 +4700,13 @@ def _match_one(filter_part, dct):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11082).
|
||||
or actual_value is not None and m.group('intval') is not None
|
||||
and isinstance(actual_value, compat_str)):
|
||||
if m.group('op') not in ('=', '!='):
|
||||
raise ValueError(
|
||||
'Operator %s does not support string values!' % m.group('op'))
|
||||
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
|
||||
quote = m.group('quote')
|
||||
if quote is not None:
|
||||
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
||||
else:
|
||||
if m.group('op') in STRING_OPERATORS:
|
||||
raise ValueError('Operator %s only supports string values!' % m.group('op'))
|
||||
try:
|
||||
comparison_value = int(m.group('intval'))
|
||||
except ValueError:
|
||||
@@ -4736,7 +4742,8 @@ def match_str(filter_str, dct):
|
||||
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
|
||||
|
||||
return all(
|
||||
_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
|
||||
_match_one(filter_part.replace(r'\&', '&'), dct)
|
||||
for filter_part in re.split(r'(?<!\\)&', filter_str))
|
||||
|
||||
|
||||
def match_filter_func(filter_str):
|
||||
@@ -6149,8 +6156,11 @@ def to_high_limit_path(path):
|
||||
return path
|
||||
|
||||
|
||||
def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
|
||||
val = obj.get(field, default)
|
||||
def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
|
||||
if field is None:
|
||||
val = obj if obj is not None else default
|
||||
else:
|
||||
val = obj.get(field, default)
|
||||
if func and val not in ignore:
|
||||
val = func(val)
|
||||
return template % val if val not in ignore else default
|
||||
@@ -6251,6 +6261,8 @@ def traverse_obj(
|
||||
|
||||
def _traverse_obj(obj, path, _current_depth=0):
|
||||
nonlocal depth
|
||||
if obj is None:
|
||||
return None
|
||||
path = tuple(variadic(path))
|
||||
for i, key in enumerate(path):
|
||||
if isinstance(key, (list, tuple)):
|
||||
@@ -6263,7 +6275,7 @@ def traverse_obj(
|
||||
_current_depth += 1
|
||||
depth = max(depth, _current_depth)
|
||||
return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
|
||||
elif isinstance(obj, dict):
|
||||
elif isinstance(obj, dict) and not (is_user_input and key == ':'):
|
||||
obj = (obj.get(key) if casesense or (key in obj)
|
||||
else next((v for k, v in obj.items() if _lower(k) == key), None))
|
||||
else:
|
||||
@@ -6271,7 +6283,7 @@ def traverse_obj(
|
||||
key = (int_or_none(key) if ':' not in key
|
||||
else slice(*map(int_or_none, key.split(':'))))
|
||||
if key == slice(None):
|
||||
return _traverse_obj(obj, (..., *path[i + 1:]))
|
||||
return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
|
||||
if not isinstance(key, (int, slice)):
|
||||
return None
|
||||
if not isinstance(obj, (list, tuple, LazyList)):
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.07.24'
|
||||
__version__ = '2021.08.02'
|
||||
|
||||
@@ -331,6 +331,26 @@ class CueBlock(Block):
|
||||
'settings': self.settings,
|
||||
}
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.as_json == other.as_json
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json):
|
||||
return cls(
|
||||
id=json['id'],
|
||||
start=json['start'],
|
||||
end=json['end'],
|
||||
text=json['text'],
|
||||
settings=json['settings']
|
||||
)
|
||||
|
||||
def hinges(self, other):
|
||||
if self.text != other.text:
|
||||
return False
|
||||
if self.settings != other.settings:
|
||||
return False
|
||||
return self.start <= self.end == other.start <= other.end
|
||||
|
||||
|
||||
def parse_fragment(frag_content):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user