mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-10 17:01:17 +00:00
Compare commits
201 Commits
2022.01.21
...
2022.03.08
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a93dc1b85 | ||
|
|
535eb16a44 | ||
|
|
9461cb586a | ||
|
|
a405b38f20 | ||
|
|
08d30158ec | ||
|
|
c89bec262c | ||
|
|
151f8f1c02 | ||
|
|
a35155be17 | ||
|
|
e66662b1e0 | ||
|
|
4390d5ec12 | ||
|
|
9e0e6adb2d | ||
|
|
b637c4e22e | ||
|
|
fb6e3f4389 | ||
|
|
409cdd1ec9 | ||
|
|
992f9a730b | ||
|
|
497d2fab6c | ||
|
|
2807d1709b | ||
|
|
b46ccbc6d4 | ||
|
|
1ed7953a74 | ||
|
|
d49669acad | ||
|
|
bed30106f5 | ||
|
|
27231526ae | ||
|
|
50e93e03a7 | ||
|
|
72e995f122 | ||
|
|
8b7539d27c | ||
|
|
e48b3875ec | ||
|
|
2a938746f3 | ||
|
|
933dbf5a55 | ||
|
|
a10aa588b0 | ||
|
|
be8cd3cb1d | ||
|
|
319b6059d2 | ||
|
|
4c3f8c3fb6 | ||
|
|
7265a2190c | ||
|
|
3a4bb9f751 | ||
|
|
b90dbe6c19 | ||
|
|
97bef011ee | ||
|
|
ecca4519b7 | ||
|
|
761fba6d22 | ||
|
|
5bcccbfec3 | ||
|
|
ded9f32667 | ||
|
|
45806d44a7 | ||
|
|
747c0bd127 | ||
|
|
acea8d7cfb | ||
|
|
f1d130902b | ||
|
|
c2ae48dbd5 | ||
|
|
a5c0c20252 | ||
|
|
f494ddada8 | ||
|
|
02fc6feb6e | ||
|
|
7eaf7f9aba | ||
|
|
334b1c4800 | ||
|
|
7c219ea601 | ||
|
|
93c8410d33 | ||
|
|
195c22840c | ||
|
|
f0734e1190 | ||
|
|
15dfb3929c | ||
|
|
3e9b66d761 | ||
|
|
a539f06570 | ||
|
|
b440e1bb22 | ||
|
|
03f830040a | ||
|
|
09b49e1f68 | ||
|
|
1108613f02 | ||
|
|
a30a6ed3e4 | ||
|
|
65d151d58f | ||
|
|
72073451be | ||
|
|
77cc7c6e60 | ||
|
|
971c4847d7 | ||
|
|
7a34b5d628 | ||
|
|
4d4f9a029f | ||
|
|
f099df1463 | ||
|
|
3f4faff748 | ||
|
|
be8d623455 | ||
|
|
a7d4acc018 | ||
|
|
febff4c119 | ||
|
|
ed66a17ef0 | ||
|
|
5625e6073f | ||
|
|
0ad92dfb18 | ||
|
|
60f3e99592 | ||
|
|
8d93e69d67 | ||
|
|
3aa915400d | ||
|
|
dcd55f766d | ||
|
|
2e4cacd038 | ||
|
|
c15c316b21 | ||
|
|
549cb2a836 | ||
|
|
c571b3a6ab | ||
|
|
5b804e3906 | ||
|
|
6bb608d055 | ||
|
|
ae419aa94f | ||
|
|
ac184ab742 | ||
|
|
5c10453827 | ||
|
|
ffa89477ea | ||
|
|
db74de8c54 | ||
|
|
edecb5f81f | ||
|
|
85a0ad0117 | ||
|
|
07ea0014ae | ||
|
|
e1f7f235bd | ||
|
|
fc259cc249 | ||
|
|
9a5b012575 | ||
|
|
df635a09a4 | ||
|
|
812283199a | ||
|
|
5c6dfc1f79 | ||
|
|
c2a8547fdc | ||
|
|
0a19532ead | ||
|
|
2d41e2eceb | ||
|
|
81c5f44c0f | ||
|
|
1f7db8533a | ||
|
|
e8969bda94 | ||
|
|
c82f051dbb | ||
|
|
49895f062e | ||
|
|
60f393e48b | ||
|
|
88afe05695 | ||
|
|
57ebfca39b | ||
|
|
b1cb0525ac | ||
|
|
da42679b87 | ||
|
|
2944835080 | ||
|
|
a3eb987e0e | ||
|
|
7bc33ad0e9 | ||
|
|
2068a60318 | ||
|
|
1ce9a3cb49 | ||
|
|
d49f8db39f | ||
|
|
ab6df717d1 | ||
|
|
0c8d9e5fec | ||
|
|
3f047fc406 | ||
|
|
82b5176783 | ||
|
|
17b183886f | ||
|
|
cd170e8184 | ||
|
|
297e9952b6 | ||
|
|
dca4f46274 | ||
|
|
5dee3ad037 | ||
|
|
079a7cfc71 | ||
|
|
3856407a86 | ||
|
|
db2e129ca0 | ||
|
|
1209b6ca5b | ||
|
|
a3125791c7 | ||
|
|
f1657a98cb | ||
|
|
b761428226 | ||
|
|
c1653e9efb | ||
|
|
84bbc54599 | ||
|
|
1e5d87beee | ||
|
|
22219f2d1f | ||
|
|
5a13fdd225 | ||
|
|
af5c1c553e | ||
|
|
3cea9ec2eb | ||
|
|
28469edd7d | ||
|
|
d5a398988b | ||
|
|
455a15e2dc | ||
|
|
460a1c08b9 | ||
|
|
4918522735 | ||
|
|
65662dffb1 | ||
|
|
5e51f4a8ad | ||
|
|
54bb39065c | ||
|
|
c5332d7fbb | ||
|
|
35cd4c4d88 | ||
|
|
67fb99f193 | ||
|
|
85553414ae | ||
|
|
d16df59db5 | ||
|
|
63c3ee4f63 | ||
|
|
182bda88e8 | ||
|
|
16aa9ea41d | ||
|
|
d6bc443bde | ||
|
|
046cab3915 | ||
|
|
7df07a3b55 | ||
|
|
2d49720f89 | ||
|
|
48416bc4a8 | ||
|
|
6a0546e313 | ||
|
|
dbcea0585f | ||
|
|
f7d4854131 | ||
|
|
403be2eefb | ||
|
|
63bac931c2 | ||
|
|
7c74a01584 | ||
|
|
1d3586d0d5 | ||
|
|
c533c89ce1 | ||
|
|
b8b3f4562a | ||
|
|
1c6f480160 | ||
|
|
f8580bf02f | ||
|
|
19afd9ea51 | ||
|
|
b72270d27e | ||
|
|
706dfe441b | ||
|
|
c4da5ff971 | ||
|
|
e26f9cc1e5 | ||
|
|
fa8fd95118 | ||
|
|
05b23b4156 | ||
|
|
8f028b5f40 | ||
|
|
013322a95e | ||
|
|
fb62afd6f0 | ||
|
|
50600e833d | ||
|
|
fc08bdd6ab | ||
|
|
2568d41f70 | ||
|
|
88f23a18e0 | ||
|
|
bb66c24797 | ||
|
|
2edb38e8ca | ||
|
|
af6793f804 | ||
|
|
b695e3f9bd | ||
|
|
6a5a30f9e2 | ||
|
|
d37707bda4 | ||
|
|
f40ee5e9a0 | ||
|
|
1f13021eca | ||
|
|
e612f66c7c | ||
|
|
87e8e8a7d0 | ||
|
|
e600a5c908 | ||
|
|
50ce204cc2 | ||
|
|
144a3588b4 |
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Broken site support
|
||||
name: Broken site
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -51,12 +51,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.03.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.03.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -62,12 +62,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.03.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.03.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a site feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -32,7 +32,7 @@ body:
|
||||
label: Example URLs
|
||||
description: |
|
||||
Example URLs that can be used to demonstrate the requested feature
|
||||
value: |
|
||||
placeholder: |
|
||||
https://www.youtube.com/watch?v=BaW_jenozKc
|
||||
validations:
|
||||
required: true
|
||||
@@ -60,12 +60,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.03.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.03.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -11,7 +11,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are alive and playable in a browser
|
||||
required: true
|
||||
@@ -45,12 +45,12 @@ body:
|
||||
[debug] Portable config file: yt-dlp.conf
|
||||
[debug] Portable config: ['-i']
|
||||
[debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252
|
||||
[debug] yt-dlp version 2021.12.27 (exe)
|
||||
[debug] yt-dlp version 2022.03.08 (exe)
|
||||
[debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0
|
||||
[debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1
|
||||
[debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets
|
||||
[debug] Proxy map: {}
|
||||
yt-dlp is up to date (2021.12.27)
|
||||
yt-dlp is up to date (2022.03.08)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -11,7 +11,9 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2021.12.27**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
required: true
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/6_question.yml
vendored
3
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -25,7 +25,8 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -3,3 +3,6 @@ contact_links:
|
||||
- name: Get help from the community on Discord
|
||||
url: https://discord.gg/H5MNcFW63r
|
||||
about: Join the yt-dlp Discord for community-powered support!
|
||||
- name: Matrix Bridge to the Discord server
|
||||
url: https://matrix.to/#/#yt-dlp:matrix.org
|
||||
about: For those who do not want to use Discord
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Broken site support
|
||||
name: Broken site
|
||||
description: Report broken or misfunctioning site
|
||||
labels: [triage, site-bug]
|
||||
body:
|
||||
|
||||
@@ -11,6 +11,8 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a feature request
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update))
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates
|
||||
|
||||
3
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
3
.github/ISSUE_TEMPLATE_tmpl/6_question.yml
vendored
@@ -25,7 +25,8 @@ body:
|
||||
Ask your question in an arbitrary form.
|
||||
Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient).
|
||||
Provide any additional information and as much context and examples as possible.
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template
|
||||
If your question contains "isn't working" or "can you add", this is most likely the wrong template.
|
||||
If you are in doubt if this is the right template, use another template!
|
||||
placeholder: WRITE QUESTION HERE
|
||||
validations:
|
||||
required: true
|
||||
|
||||
7
.github/workflows/build.yml
vendored
7
.github/workflows/build.yml
vendored
@@ -161,11 +161,10 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
# In order to create a universal2 application, the version of python3 in /usr/bin has to be used
|
||||
# Pyinstaller is pinned to 4.5.1 because the builds are failing in 4.6, 4.7
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.5.1 -r requirements.txt
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.10 -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: /usr/bin/python3 devscripts/update-version.py
|
||||
@@ -234,7 +233,7 @@ jobs:
|
||||
# Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel py2exe
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.10-py3-none-any.whl" -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
env:
|
||||
@@ -321,7 +320,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.10-py3-none-any.whl" -r requirements.txt
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
env:
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -24,6 +24,7 @@ cookies
|
||||
|
||||
*.3gp
|
||||
*.ape
|
||||
*.ass
|
||||
*.avi
|
||||
*.desktop
|
||||
*.flac
|
||||
@@ -92,7 +93,7 @@ README.txt
|
||||
*.tar.gz
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/player-*.js
|
||||
test/testdata/sigs/player-*.js
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
@@ -106,6 +107,7 @@ yt-dlp.zip
|
||||
*.iml
|
||||
.vscode
|
||||
*.sublime-*
|
||||
*.code-workspace
|
||||
|
||||
# Lazy extractors
|
||||
*/extractor/lazy_extractors.py
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
- [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature)
|
||||
- [Is your question about yt-dlp?](#is-your-question-about-yt-dlp)
|
||||
- [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed)
|
||||
- [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy)
|
||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||
- [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes)
|
||||
- [Adding support for a new site](#adding-support-for-a-new-site)
|
||||
@@ -24,6 +25,7 @@
|
||||
- [Collapse fallbacks](#collapse-fallbacks)
|
||||
- [Trailing parentheses](#trailing-parentheses)
|
||||
- [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions)
|
||||
- [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes)
|
||||
- [EMBEDDING YT-DLP](README.md#embedding-yt-dlp)
|
||||
|
||||
|
||||
@@ -113,7 +115,7 @@ If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-
|
||||
|
||||
### Are you willing to share account details if needed?
|
||||
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discression whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it.
|
||||
|
||||
By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials.
|
||||
|
||||
@@ -123,6 +125,10 @@ While these steps won't necessarily ensure that no misuse of the account takes p
|
||||
- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator).
|
||||
- Change the password after receiving the account back.
|
||||
|
||||
### Is the website primarily used for piracy?
|
||||
|
||||
We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in deep fake. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management).
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -210,7 +216,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
}
|
||||
```
|
||||
1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py).
|
||||
1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
|
||||
1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
|
||||
1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
|
||||
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
@@ -252,7 +258,11 @@ For extraction to work yt-dlp relies on metadata your extractor extracts and pro
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatibility reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal.
|
||||
|
||||
For pornographic sites, appropriate `age_limit` must also be returned.
|
||||
|
||||
The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
|
||||
[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
@@ -654,6 +664,10 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# My pull request is labeled pending-fixes
|
||||
|
||||
The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
40
CONTRIBUTORS
40
CONTRIBUTORS
@@ -146,7 +146,7 @@ chio0hai
|
||||
cntrl-s
|
||||
Deer-Spangle
|
||||
DEvmIb
|
||||
Grabien
|
||||
Grabien/MaximVol
|
||||
j54vc1bk
|
||||
mpeter50
|
||||
mrpapersonic
|
||||
@@ -160,7 +160,7 @@ PilzAdam
|
||||
zmousm
|
||||
iw0nderhow
|
||||
unit193
|
||||
TwoThousandHedgehogs
|
||||
TwoThousandHedgehogs/KathrynElrod
|
||||
Jertzukka
|
||||
cypheron
|
||||
Hyeeji
|
||||
@@ -178,3 +178,39 @@ jaller94
|
||||
r5d
|
||||
julien-hadleyjack
|
||||
git-anony-mouse
|
||||
mdawar
|
||||
trassshhub
|
||||
foghawk
|
||||
k3ns1n
|
||||
teridon
|
||||
mozlima
|
||||
timendum
|
||||
ischmidt20
|
||||
CreaValix
|
||||
sian1468
|
||||
arkamar
|
||||
hyano
|
||||
KiberInfinity
|
||||
tejing1
|
||||
Bricio
|
||||
lazypete365
|
||||
Aniruddh-J
|
||||
blackgear
|
||||
CplPwnies
|
||||
cyberfox1691
|
||||
FestplattenSchnitzel
|
||||
hatienl0i261299
|
||||
iphoting
|
||||
jakeogh
|
||||
lukasfink1
|
||||
lyz-code
|
||||
marieell
|
||||
mdpauley
|
||||
Mipsters
|
||||
mxmehl
|
||||
ofkz
|
||||
P-reducible
|
||||
pycabbage
|
||||
regarten
|
||||
Ronnnny
|
||||
schn0sch
|
||||
|
||||
315
Changelog.md
315
Changelog.md
@@ -11,6 +11,321 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2022.03.08
|
||||
|
||||
* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR)
|
||||
* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1)
|
||||
* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Add pre-processor stage `after_filter`
|
||||
* Better error message when no `--live-from-start` format
|
||||
* Create necessary directories for `--print-to-file`
|
||||
* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* Fix `-all` for `--sub-langs`
|
||||
* Fix doubling of `video_id` in `ExtractorError`
|
||||
* Fix for when stdout/stderr encoding is `None`
|
||||
* Handle negative duration from extractor
|
||||
* Implement `--add-header` without modifying `std_headers`
|
||||
* Obey `--abort-on-error` for "ffmpeg not installed"
|
||||
* Set `webpage_url_...` from `webpage_url` and not input URL
|
||||
* Tolerate failure to `--write-link` due to unknown URL
|
||||
* [aria2c] Add `--http-accept-gzip=true`
|
||||
* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev)
|
||||
* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley)
|
||||
* [devscripts] Improve `prepare_manpage`
|
||||
* [downloader] Do not use aria2c for non-native `m3u8`
|
||||
* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
|
||||
* [extractor] Allow `http_headers` to be specified for `thumbnails`
|
||||
* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [extractor] Fix for manifests without period duration by [dirkf,](https://github.com/dirkf,) [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [FFmpegConcat] Abort on `--simulate`
|
||||
* [FormatSort] Consider `acodec`=`ogg` as `vorbis`
|
||||
* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [generic] Pass referer to extracted formats
|
||||
* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio)
|
||||
* [options] Better ambiguous option resolution
|
||||
* [options] Rename `--clean-infojson` to `--clean-info-json`
|
||||
* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari)
|
||||
* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari)
|
||||
* [utils] Better traceback for `ExtractorError`
|
||||
* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh)
|
||||
* [utils] Improve file locking
|
||||
* [utils] OnDemandPagedList: Do not download pages after error
|
||||
* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh)
|
||||
* [utils] Validate `DateRange` input
|
||||
* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos
|
||||
* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell)
|
||||
* [cleanup, docs] Misc cleanup
|
||||
* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
|
||||
* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm)
|
||||
* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio)
|
||||
* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691)
|
||||
* [nfb] Add extractor by [ofkz](https://github.com/ofkz)
|
||||
* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch)
|
||||
* [piapro] Add extractor by [pycabbage,](https://github.com/pycabbage,) [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [rokfin] Add extractor by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan)
|
||||
* [rokfin] Add stack and channel extractors by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan)
|
||||
* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow)
|
||||
* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
|
||||
* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny)
|
||||
* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi)
|
||||
* [ard] Fix valid URL
|
||||
* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell)
|
||||
* [bandcamp] Detect acodec
|
||||
* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code)
|
||||
* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8)
|
||||
* [beeg] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [bigo] Fix extractor to not to use `form_params`
|
||||
* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear)
|
||||
* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1)
|
||||
* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien)
|
||||
* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong)
|
||||
* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t)
|
||||
* [Gettr] Fix formats order by [i6t](https://github.com/i6t)
|
||||
* [Gettr] Improve extractor by [i6t](https://github.com/i6t)
|
||||
* [globo] Expand valid URL by [Bricio](https://github.com/Bricio)
|
||||
* [lbry] Fix `--ignore-no-formats-error`
|
||||
* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten)
|
||||
* [mildom] Fix linter
|
||||
* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [niconico:tag] Add support for searching tags
|
||||
* [nrk] Add fallback API
|
||||
* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch)
|
||||
* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl)
|
||||
* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [spiegel] Fix `_VALID_URL`
|
||||
* [ThumbnailsConvertor] Support `webp`
|
||||
* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs
|
||||
* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis)
|
||||
* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk)
|
||||
* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitch] Fix field name of `view_count`
|
||||
* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting)
|
||||
* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [youtube:tab] Add `approximate_date` extractor-arg
|
||||
* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:tab] Reject webpage data if redirected to home page
|
||||
* [youtube] De-prioritize potentially damaged formats
|
||||
* [youtube] Differentiate descriptive audio by language code
|
||||
* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [youtube] Fix automatic captions
|
||||
* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [youtube] Further de-prioritize 3gp format
|
||||
* [youtube] Label original auto-subs
|
||||
* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell)
|
||||
* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J)
|
||||
* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
|
||||
* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters)
|
||||
|
||||
|
||||
### 2022.02.04
|
||||
|
||||
* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube:search] Add tests
|
||||
* [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [mediaset] Fix extractor by [nixxo](https://github.com/nixxo)
|
||||
* [websocket] Make syntax error in `websockets` module non-fatal
|
||||
|
||||
### 2022.02.03
|
||||
|
||||
* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67)
|
||||
* Add option `--print-to-file`
|
||||
* Make nested --config-locations relative to parent file
|
||||
* Ensure `_type` is present in `info.json`
|
||||
* Fix `--compat-options list-formats`
|
||||
* Fix/improve `InAdvancePagedList`
|
||||
* [downloader/ffmpeg] Handle unknown formats better
|
||||
* [outtmpl] Handle `-o ""` better
|
||||
* [outtmpl] Handle hard-coded file extension better
|
||||
* [extractor] Add convinience function `_yes_playlist`
|
||||
* [extractor] Allow non-fatal `title` extraction
|
||||
* [extractor] Extract video inside `Article` json_ld
|
||||
* [generic] Allow further processing of json_ld URL
|
||||
* [cookies] Fix keyring selection for unsupported desktops
|
||||
* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf)
|
||||
* [aes] Add `unpad_pkcs7`
|
||||
* [test] Fix `test_youtube_playlist_noplaylist`
|
||||
* [docs,cleanup] Misc cleanup
|
||||
* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf)
|
||||
* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [youtube] Add extractor `YoutubeMusicSearchURLIE`
|
||||
* [archive.org] Ignore unnecessary files
|
||||
* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [bilibili] Fix extractor, make anthology title non-fatal
|
||||
* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [cctv] De-prioritize sample format
|
||||
* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1)
|
||||
* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1)
|
||||
* [doodstream] Fix extractor
|
||||
* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [FFmpegConcat] Abort on --skip-download and download errors
|
||||
* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong)
|
||||
* [globo] Fix extractor by [Bricio](https://github.com/Bricio)
|
||||
* [glomex] Simplify embed detection
|
||||
* [GoogleSearch] Fix extractor
|
||||
* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365)
|
||||
* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub)
|
||||
* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [orf:tvthek] Lazy playlist extraction and obey --no-playlist
|
||||
* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity)
|
||||
* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk)
|
||||
* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup
|
||||
* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Fix n-sig for player e06dea74
|
||||
* [youtube, cleanup] Misc fixes and cleanup
|
||||
|
||||
|
||||
### 2022.01.21
|
||||
|
||||
* Add option `--concat-playlist` to **concat videos in a playlist**
|
||||
* Allow **multiple and nested configuration files**
|
||||
* Add more post-processing stages (`after_video`, `playlist`)
|
||||
* Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`)
|
||||
* Allow `--print` to be run at any post-processing stage
|
||||
* Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro)
|
||||
* Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count`
|
||||
* Add key `requested_downloads` in the root `info_dict`
|
||||
* Write `download_archive` only after all formats are downloaded
|
||||
* [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix
|
||||
* Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba)
|
||||
* Allow escaped `,` in `--extractor-args`
|
||||
* Allow unicode characters in `info.json`
|
||||
* Check for existing thumbnail/subtitle in final directory
|
||||
* Don't treat empty containers as `None` in `sanitize_info`
|
||||
* Fix `-s --ignore-no-formats --force-write-archive`
|
||||
* Fix live title for multiple formats
|
||||
* List playlist thumbnails in `--list-thumbnails`
|
||||
* Raise error if subtitle download fails
|
||||
* [cookies] Fix bug when keyring is unspecified
|
||||
* [ffmpeg] Ignore unknown streams, standardize use of `-map 0`
|
||||
* [outtmpl] Alternate form for `D` and fix suffix's case
|
||||
* [utils] Add `Sec-Fetch-Mode` to `std_headers`
|
||||
* [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar)
|
||||
* [utils] Handle `ss:xxx` in `parse_duration`
|
||||
* [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan)
|
||||
* [utils] Use key `None` in `traverse_obj` to return as-is
|
||||
* [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo)
|
||||
* [extractor] Improve `url_result` and related
|
||||
* [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub)
|
||||
* [build] Reduce dependency on third party workflows
|
||||
* [extractor,cleanup] Use `_search_nextjs_data`, `format_field`
|
||||
* [cleanup] Minor fixes and cleanup
|
||||
* [docs] Improvements
|
||||
* [test] Fix TestVerboseOutput
|
||||
* [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi)
|
||||
* [callin] Add extractor by [foghawk](https://github.com/foghawk)
|
||||
* [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n)
|
||||
* [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon)
|
||||
* [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [megatvcom] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub)
|
||||
* [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [Pornez] Add extractor by [mozlima](https://github.com/mozlima)
|
||||
* [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub)
|
||||
* [tvopengr] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [glomex] Add extractors by [zmousm](https://github.com/zmousm)
|
||||
* [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png)
|
||||
* [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
|
||||
* [aparat] Fix extractor
|
||||
* [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
|
||||
* [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [CeskaTelevize] Use `http` for manifests
|
||||
* [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum)
|
||||
* [dplay] Re-structure DiscoveryPlus extractors
|
||||
* [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de)
|
||||
* [facebook] Fix extraction from groups
|
||||
* [facebook] Improve title and uploader extraction
|
||||
* [facebook] Parse dash manifests
|
||||
* [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20)
|
||||
* [funk] Support origin URLs
|
||||
* [gfycat] Fix `uploader`
|
||||
* [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix)
|
||||
* [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h)
|
||||
* [kakao] Detect geo-restriction
|
||||
* [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468)
|
||||
* [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47)
|
||||
* [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20)
|
||||
* [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [pbs] de-prioritize AD formats
|
||||
* [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193)
|
||||
* [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan)
|
||||
* [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub)
|
||||
* [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
* [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47)
|
||||
* [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [twitter] Fix video in quoted tweets
|
||||
* [veoh] Improve extractor by [foghawk](https://github.com/foghawk)
|
||||
* [vk] Capture `clip` URLs
|
||||
* [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804)
|
||||
* [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n)
|
||||
* [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig)
|
||||
* [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE)
|
||||
* [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano)
|
||||
* [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Detect live-stream embeds
|
||||
* [youtube] Do not return `upload_date` for playlists
|
||||
* [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [youtube] Make invalid storyboard URL non-fatal
|
||||
* [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow)
|
||||
* [zee5] Add geo-bypass
|
||||
|
||||
|
||||
### 2021.12.27
|
||||
|
||||
* Avoid recursion error when re-extracting info
|
||||
|
||||
@@ -41,7 +41,7 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
||||
|
||||
|
||||
## [Lesmicore](https://github.com/Lesmiscore) (nao20010128nao)
|
||||
## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao)
|
||||
|
||||
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||
|
||||
4
Makefile
4
Makefile
@@ -14,9 +14,9 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
clean-test:
|
||||
rm -rf test/testdata/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
|
||||
*.3gp *.ape *.ass *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
|
||||
*.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
|
||||
143
README.md
143
README.md
@@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5014bd6](https://github.com/ytdl-org/youtube-dl/commit/5014bd67c22b421207b2650d4dc874b95b36dda1)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Based on **youtube-dl 2021.12.17 [commit/5add3f4](https://github.com/ytdl-org/youtube-dl/commit/5add3f4373287e6346ca3551239edab549284db3)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
@@ -88,7 +88,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
* `255kbps` audio is extracted (if available) from youtube music when premium cookies are given
|
||||
* Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723))
|
||||
* Download livestreams from the start using `--live-from-start`
|
||||
* Download livestreams from the start using `--live-from-start` (experimental)
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||
|
||||
@@ -110,9 +110,9 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
||||
|
||||
* **Other new options**: Many new options have been added such as `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
* **Other new options**: Many new options have been added such as `--concat-playlist`, `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
|
||||
|
||||
* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, etc
|
||||
* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
|
||||
|
||||
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
|
||||
|
||||
@@ -130,7 +130,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order
|
||||
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
|
||||
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
|
||||
* `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
||||
* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
|
||||
* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files
|
||||
* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this
|
||||
* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
|
||||
@@ -267,7 +267,8 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
|
||||
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
|
||||
* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
|
||||
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
|
||||
* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
|
||||
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
|
||||
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
|
||||
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
|
||||
@@ -278,13 +279,14 @@ To use or redistribute the dependencies, you must agree to their respective lice
|
||||
|
||||
The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included.
|
||||
|
||||
<!-- TODO: ffmpeg has merged this patch. Remove this note once there is new release -->
|
||||
**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
|
||||
|
||||
## COMPILE
|
||||
|
||||
**For Windows**:
|
||||
To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
|
||||
To build the Windows executable, you must have pyinstaller (and any of yt-dlp's optional dependencies if needed). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
|
||||
|
||||
py -m pip install -U pyinstaller -r requirements.txt
|
||||
py devscripts/make_lazy_extractors.py
|
||||
@@ -379,8 +381,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
To enable SOCKS proxy, specify a proper
|
||||
scheme. For example
|
||||
socks5://127.0.0.1:1080/. Pass in an empty
|
||||
string (--proxy "") for direct connection
|
||||
socks5://user:pass@127.0.0.1:1080/. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
@@ -393,7 +396,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
option is not present) is used for the
|
||||
actual downloading
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header
|
||||
X-Forwarded-For HTTP header (default)
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
@@ -604,11 +607,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--write-description etc. (default)
|
||||
--no-write-playlist-metafiles Do not write playlist metadata when using
|
||||
--write-info-json, --write-description etc.
|
||||
--clean-infojson Remove some private fields such as
|
||||
--clean-info-json Remove some private fields such as
|
||||
filenames from the infojson. Note that it
|
||||
could still contain some personal
|
||||
information (default)
|
||||
--no-clean-infojson Write all fields to the infojson
|
||||
--no-clean-info-json Write all fields to the infojson
|
||||
--write-comments Retrieve video comments to be placed in the
|
||||
infojson. The comments are fetched even
|
||||
without this option if the extraction is
|
||||
@@ -686,6 +689,12 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
Implies --quiet and --simulate (unless
|
||||
--no-simulate is used). This option can be
|
||||
used multiple times
|
||||
--print-to-file [WHEN:]TEMPLATE FILE
|
||||
Append given template to the file. The
|
||||
values of WHEN and TEMPLATE are same as
|
||||
that of --print. FILE uses the same syntax
|
||||
as the output template. This option can be
|
||||
used multiple times
|
||||
-j, --dump-json Quiet, but print JSON information for each
|
||||
video. Simulate unless --no-simulate is
|
||||
used. See "OUTPUT TEMPLATE" for a
|
||||
@@ -723,13 +732,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--legacy-server-connect Explicitly allow HTTPS connection to
|
||||
servers that do not support RFC 5746 secure
|
||||
renegotiation
|
||||
--no-check-certificates Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE Specify a custom HTTP header and its value,
|
||||
separated by a colon ":". You can use this
|
||||
option multiple times
|
||||
@@ -941,7 +950,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
(currently supported: srt|vtt|ass|lrc)
|
||||
(Alias: --convert-subtitles)
|
||||
--convert-thumbnails FORMAT Convert the thumbnails to another format
|
||||
(currently supported: jpg|png)
|
||||
(currently supported: jpg|png|webp)
|
||||
--split-chapters Split video into multiple files based on
|
||||
internal chapters. The "chapter:" prefix
|
||||
can be used with "--paths" and "--output"
|
||||
@@ -972,15 +981,17 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
semicolon ";" delimited list of NAME=VALUE.
|
||||
The "when" argument determines when the
|
||||
postprocessor is invoked. It can be one of
|
||||
"pre_process" (after extraction),
|
||||
"before_dl" (before video download),
|
||||
"post_process" (after video download;
|
||||
default), "after_move" (after moving file
|
||||
to their final locations), "after_video"
|
||||
(after downloading and processing all
|
||||
formats of a video), or "playlist" (end of
|
||||
playlist). This option can be used multiple
|
||||
times to add different postprocessors
|
||||
"pre_process" (after video extraction),
|
||||
"after_filter" (after video passes filter),
|
||||
"before_dl" (before each video download),
|
||||
"post_process" (after each video download;
|
||||
default), "after_move" (after moving video
|
||||
file to it's final locations),
|
||||
"after_video" (after downloading and
|
||||
processing all formats of a video), or
|
||||
"playlist" (at end of playlist). This
|
||||
option can be used multiple times to add
|
||||
different postprocessors
|
||||
|
||||
## SponsorBlock Options:
|
||||
Make chapter entries for, or remove various segments (sponsor,
|
||||
@@ -1040,7 +1051,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
|
||||
|
||||
1. **Main Configuration**: The file given by `--config-location`
|
||||
1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`<root dir>/yt_dlp/__main__.py`), the root directory is used instead.
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P "home:<path>"`, or in the current directory if no such path is given
|
||||
1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given
|
||||
1. **User Configuration**:
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS)
|
||||
* `%XDG_CONFIG_HOME%/yt-dlp.conf`
|
||||
@@ -1127,12 +1138,13 @@ To summarize, the general syntax for a field is:
|
||||
%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
|
||||
```
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `fulltitle` (string): Video title ignoring live timestamp and generic title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
@@ -1188,16 +1200,16 @@ The available fields are:
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch when creating the file
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `video_autonumber` (numeric): Number that will be increased with each video
|
||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||
- `playlist` (string): Name or id of the playlist that contains the video
|
||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||
- `playlist_title` (string): Name of the playlist that contains the video
|
||||
- `playlist` (string): `playlist_id` or `playlist_title`
|
||||
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
|
||||
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
- `playlist_uploader` (string): Full name of the playlist uploader
|
||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||
- `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
|
||||
@@ -1388,7 +1400,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
|
||||
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
@@ -1398,7 +1410,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
@@ -1541,8 +1553,9 @@ $ yt-dlp -S "proto"
|
||||
|
||||
|
||||
|
||||
# Download the best video with h264 codec, or the best video if there is no such video
|
||||
$ yt-dlp -f "(bv*+ba/b)[vcodec^=avc1] / (bv*+ba/b)"
|
||||
# Download the best video with either h264 or h265 codec,
|
||||
# or the best video if there is no such video
|
||||
$ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)"
|
||||
|
||||
# Download the best video with best codec no better than h264,
|
||||
# or the best video with worst codec if there is no such video
|
||||
@@ -1587,25 +1600,28 @@ This option also has a few special uses:
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
||||
|
||||
**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes.
|
||||
|
||||
For reference, these are the fields yt-dlp adds by default to the file metadata:
|
||||
|
||||
Metadata fields|From
|
||||
:---|:---
|
||||
`title`|`track` or `title`
|
||||
`date`|`upload_date`
|
||||
`description`, `synopsis`|`description`
|
||||
`purl`, `comment`|`webpage_url`
|
||||
`track`|`track_number`
|
||||
`artist`|`artist`, `creator`, `uploader` or `uploader_id`
|
||||
`genre`|`genre`
|
||||
`album`|`album`
|
||||
`album_artist`|`album_artist`
|
||||
`disc`|`disc_number`
|
||||
`show`|`series`
|
||||
`season_number`|`season_number`
|
||||
`episode_id`|`episode` or `episode_id`
|
||||
`episode_sort`|`episode_number`
|
||||
`language` of each stream|From the format's `language`
|
||||
Metadata fields | From
|
||||
:--------------------------|:------------------------------------------------
|
||||
`title` | `track` or `title`
|
||||
`date` | `upload_date`
|
||||
`description`, `synopsis` | `description`
|
||||
`purl`, `comment` | `webpage_url`
|
||||
`track` | `track_number`
|
||||
`artist` | `artist`, `creator`, `uploader` or `uploader_id`
|
||||
`genre` | `genre`
|
||||
`album` | `album`
|
||||
`album_artist` | `album_artist`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
`season_number` | `season_number`
|
||||
`episode_id` | `episode` or `episode_id`
|
||||
`episode_sort` | `episode_number`
|
||||
`language` of each stream | the format's `language`
|
||||
|
||||
**Note**: The file format may not support some of these fields
|
||||
|
||||
|
||||
@@ -1651,6 +1667,7 @@ The following extractors use this feature:
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
|
||||
#### funimation
|
||||
* `language`: Languages to extract. Eg: `funimation:language=english,japanese`
|
||||
@@ -1660,6 +1677,11 @@ The following extractors use this feature:
|
||||
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyroll:beta
|
||||
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
|
||||
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
@@ -1674,6 +1696,14 @@ The following extractors use this feature:
|
||||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### tiktok
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`)
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`)
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download. One of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`. (E.g. `rokfinchannel:tab=streams`)
|
||||
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
@@ -1790,12 +1820,11 @@ ydl_opts = {
|
||||
}],
|
||||
'logger': MyLogger(),
|
||||
'progress_hooks': [my_hook],
|
||||
# Add custom headers
|
||||
'http_headers': {'Referer': 'https://www.google.com'}
|
||||
}
|
||||
|
||||
|
||||
# Add custom headers
|
||||
yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'})
|
||||
|
||||
# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions.
|
||||
# Eg: "ydl.download", "ydl.download_with_info_file"
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
@@ -1838,6 +1867,8 @@ While these options are redundant, they are still expected to be used due to the
|
||||
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
||||
--min-views COUNT --match-filter "view_count >=? COUNT"
|
||||
--max-views COUNT --match-filter "view_count <=? COUNT"
|
||||
--user-agent UA --add-header "User-Agent:UA"
|
||||
--referer URL --add-header "Referer:URL"
|
||||
|
||||
|
||||
#### Not recommended
|
||||
@@ -1875,11 +1906,13 @@ These options are not intended to be used by the end-user
|
||||
These are aliases that are no longer documented for various reasons
|
||||
|
||||
--avconv-location --ffmpeg-location
|
||||
--clean-infojson --clean-info-json
|
||||
--cn-verification-proxy URL --geo-verification-proxy URL
|
||||
--dump-headers --print-traffic
|
||||
--dump-intermediate-pages --dump-pages
|
||||
--force-write-download-archive --force-write-archive
|
||||
--load-info --load-info-json
|
||||
--no-clean-infojson --no-clean-info-json
|
||||
--no-split-tracks --no-split-chapters
|
||||
--no-write-srt --no-write-subs
|
||||
--prefer-unsecure --prefer-insecure
|
||||
|
||||
@@ -75,21 +75,21 @@ def filter_options(readme):
|
||||
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
|
||||
options = '# OPTIONS\n'
|
||||
for line in section.split('\n')[1:]:
|
||||
if line.lstrip().startswith('-'):
|
||||
split = re.split(r'\s{2,}', line.lstrip())
|
||||
# Description string may start with `-` as well. If there is
|
||||
# only one piece then it's a description bit not an option.
|
||||
if len(split) > 1:
|
||||
option, description = split
|
||||
split_option = option.split(' ')
|
||||
mobj = re.fullmatch(r'''(?x)
|
||||
\s{4}(?P<opt>-(?:,\s|[^\s])+)
|
||||
(?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
|
||||
(\s{2,}(?P<desc>.+))?
|
||||
''', line)
|
||||
if not mobj:
|
||||
options += f'{line.lstrip()}\n'
|
||||
continue
|
||||
option, metavar, description = mobj.group('opt', 'meta', 'desc')
|
||||
|
||||
if not split_option[-1].startswith('-'): # metavar
|
||||
option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*'])
|
||||
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
options += f'\n{option}\n: {description}\n'
|
||||
continue
|
||||
options += line.lstrip() + '\n'
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
option = f'{option} *{metavar}*' if metavar else option
|
||||
description = f'{description}\n' if description else ''
|
||||
options += f'\n{option}\n: {description}'
|
||||
continue
|
||||
|
||||
return readme.replace(section, options, 1)
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ def version_to_list(version):
|
||||
|
||||
|
||||
def dependency_options():
|
||||
dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets')
|
||||
dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
|
||||
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
|
||||
|
||||
yield from (f'--hidden-import={module}' for module in dependencies)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
mutagen
|
||||
pycryptodomex
|
||||
websockets
|
||||
brotli; platform_python_implementation=='CPython'
|
||||
brotlicffi; platform_python_implementation!='CPython'
|
||||
4
setup.py
4
setup.py
@@ -21,9 +21,9 @@ DESCRIPTION = 'A youtube-dl fork with additional features and patches'
|
||||
LONG_DESCRIPTION = '\n\n'.join((
|
||||
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
|
||||
open('README.md', 'r', encoding='utf-8').read()))
|
||||
open('README.md').read()))
|
||||
|
||||
REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets']
|
||||
REQUIREMENTS = open('requirements.txt').read().splitlines()
|
||||
|
||||
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
- **17live:clip**
|
||||
- **1tv**: Первый канал
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24video**
|
||||
@@ -11,7 +10,6 @@
|
||||
- **3sat**
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **7plus**
|
||||
- **8tracks**
|
||||
@@ -26,6 +24,8 @@
|
||||
- **abcnews:video**
|
||||
- **abcotvs**: ABC Owned Television Stations
|
||||
- **abcotvs:clips**
|
||||
- **AbemaTV**
|
||||
- **AbemaTVTitle**
|
||||
- **AcademicEarth:Course**
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
@@ -41,11 +41,14 @@
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:live**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **Alsace20TV**
|
||||
- **Alsace20TVEmbed**
|
||||
- **Alura**
|
||||
- **AluraCourse**
|
||||
- **Amara**
|
||||
@@ -53,11 +56,15 @@
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **AmericasTestKitchenSeason**
|
||||
- **AmHistoryChannel**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimalPlanet**
|
||||
- **AnimeLab**
|
||||
- **AnimeLabShows**
|
||||
- **AnimeOnDemand**
|
||||
- **ant1newsgr:article**: ant1news.gr articles
|
||||
- **ant1newsgr:embed**: ant1news.gr embedded videos
|
||||
- **ant1newsgr:watch**: ant1news.gr videos
|
||||
- **Anvato**
|
||||
- **aol.com**: Yahoo screen and movies
|
||||
- **APA**
|
||||
@@ -75,6 +82,7 @@
|
||||
- **Arkena**
|
||||
- **arte.sky.it**
|
||||
- **ArteTV**
|
||||
- **ArteTVCategory**
|
||||
- **ArteTVEmbed**
|
||||
- **ArteTVPlaylist**
|
||||
- **AsianCrush**
|
||||
@@ -99,8 +107,8 @@
|
||||
- **bandaichannel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:user**
|
||||
- **Bandcamp:weekly**
|
||||
- **BandcampMusic**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **BannedVideo**
|
||||
- **bbc**: BBC
|
||||
@@ -122,6 +130,7 @@
|
||||
- **bfmtv:live**
|
||||
- **BibelTV**
|
||||
- **Bigflix**
|
||||
- **Bigo**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
- **Bilibili category extractor**
|
||||
@@ -162,6 +171,8 @@
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **CableAV**
|
||||
- **Callin**
|
||||
- **Caltrans**
|
||||
- **CAM4**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
@@ -225,18 +236,24 @@
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **CookingChannel**
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **CozyTV**
|
||||
- **cp24**
|
||||
- **cpac**
|
||||
- **cpac:playlist**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **CrooksAndLiars**
|
||||
- **CrowdBunker**
|
||||
- **CrowdBunkerChannel**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:beta**
|
||||
- **crunchyroll:playlist**
|
||||
- **crunchyroll:playlist:beta**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CSpanCongress**
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
@@ -246,6 +263,7 @@
|
||||
- **curiositystream:collections**
|
||||
- **curiositystream:series**
|
||||
- **CWTV**
|
||||
- **Daftsex**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@@ -257,26 +275,27 @@
|
||||
- **daum.net:clip**
|
||||
- **daum.net:playlist**
|
||||
- **daum.net:user**
|
||||
- **daystar:clip**
|
||||
- **DBTV**
|
||||
- **DctpTv**
|
||||
- **DeezerAlbum**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DestinationAmerica**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitalConcertHall**: DigitalConcertHall extractor
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryLife**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryPlusIndia**
|
||||
- **DiscoveryPlusIndiaShow**
|
||||
- **DiscoveryPlusItaly**
|
||||
- **DiscoveryPlusItalyShow**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **DIYNetwork**
|
||||
- **dlive:stream**
|
||||
@@ -288,6 +307,7 @@
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
- **DRBonanza**
|
||||
- **Drooble**
|
||||
- **Dropbox**
|
||||
- **Dropout**
|
||||
- **DropoutSeason**
|
||||
@@ -324,12 +344,16 @@
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **EroProfile:album**
|
||||
- **ertflix**: ERTFLIX videos
|
||||
- **ertflix:codename**: ERTFLIX videos by codename
|
||||
- **ertwebtv:embed**: ert.gr webtv embedded videos
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
- **ESPNCricInfo**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EuropeanTour**
|
||||
- **EUScreen**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
@@ -343,6 +367,7 @@
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fc2:embed**
|
||||
- **fc2:live**
|
||||
- **Fczenit**
|
||||
- **Filmmodu**
|
||||
- **filmon**
|
||||
@@ -352,6 +377,7 @@
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FoodNetwork**
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
@@ -361,6 +387,7 @@
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **FoxSports**
|
||||
- **fptplay**: fptplay.vn
|
||||
- **FranceCulture**
|
||||
- **FranceInter**
|
||||
- **FranceTV**
|
||||
@@ -368,7 +395,6 @@
|
||||
- **FranceTVSite**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
@@ -400,6 +426,7 @@
|
||||
- **gem.cbc.ca:playlist**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **Gettr**
|
||||
- **GettrStreaming**
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
@@ -407,7 +434,10 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **glomex**: Glomex videos
|
||||
- **glomex:embed**: Glomex embedded videos
|
||||
- **Go**
|
||||
- **GoDiscovery**
|
||||
- **GodTube**
|
||||
- **Gofile**
|
||||
- **Golem**
|
||||
@@ -429,6 +459,7 @@
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HGTVDe**
|
||||
- **HGTVUsa**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
@@ -437,7 +468,6 @@
|
||||
- **hitbox:live**
|
||||
- **HitRecord**
|
||||
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
@@ -470,15 +500,18 @@
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:story**
|
||||
- **instagram:tag**: Instagram hashtag search URLs
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InstagramIOS**: IOS instagram:// URL
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
- **InvestigationDiscovery**
|
||||
- **IPrima**
|
||||
- **IPrimaCNN**
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITTF**
|
||||
- **ITV**
|
||||
- **ITVBTCC**
|
||||
@@ -495,11 +528,11 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
- **Katsomo**
|
||||
- **KeezMovies**
|
||||
- **KelbyOne**
|
||||
- **Ketnet**
|
||||
- **khanacademy**
|
||||
- **khanacademy:unit**
|
||||
@@ -545,7 +578,6 @@
|
||||
- **limelight:channel_list**
|
||||
- **LineLive**
|
||||
- **LineLiveChannel**
|
||||
- **LineTV**
|
||||
- **LinkedIn**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
@@ -554,6 +586,7 @@
|
||||
- **LiveJournal**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **Lnk**
|
||||
- **LnkGo**
|
||||
- **loc**: Library of Congress
|
||||
- **LocalNews8**
|
||||
@@ -566,6 +599,7 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MainStreaming**: MainStreaming Player
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
@@ -592,6 +626,8 @@
|
||||
- **MediasiteNamedCatalog**
|
||||
- **Medici**
|
||||
- **megaphone.fm**: megaphone.fm embedded players
|
||||
- **megatvcom**: megatv.com videos
|
||||
- **megatvcom:embed**: megatv.com embedded videos
|
||||
- **Meipai**: 美拍
|
||||
- **MelonVOD**
|
||||
- **META**
|
||||
@@ -603,8 +639,9 @@
|
||||
- **MiaoPai**
|
||||
- **microsoftstream**: Microsoft Stream
|
||||
- **mildom**: Record ongoing live by specific user in Mildom
|
||||
- **mildom:clip**: Clip in Mildom
|
||||
- **mildom:user:vod**: Download all VODs from specific user in Mildom
|
||||
- **mildom:vod**: Download a VOD in Mildom
|
||||
- **mildom:vod**: VOD in Mildom
|
||||
- **minds**
|
||||
- **minds:channel**
|
||||
- **minds:group**
|
||||
@@ -615,6 +652,7 @@
|
||||
- **mirrativ:user**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixch**
|
||||
- **mixch:archive**
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
@@ -646,7 +684,13 @@
|
||||
- **mtvservices:embedded**
|
||||
- **MTVUutisetArticle**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **Murrtube**
|
||||
- **MurrtubeUser**: Murrtube user profile
|
||||
- **MuseScore**
|
||||
- **MusicdexAlbum**
|
||||
- **MusicdexArtist**
|
||||
- **MusicdexPlaylist**
|
||||
- **MusicdexSong**
|
||||
- **mva**: Microsoft Virtual Academy videos
|
||||
- **mva:course**: Microsoft Virtual Academy courses
|
||||
- **Mwave**
|
||||
@@ -704,14 +748,19 @@
|
||||
- **Newgrounds:playlist**
|
||||
- **Newgrounds:user**
|
||||
- **Newstube**
|
||||
- **Newsy**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **NFB**
|
||||
- **NFHSNetwork**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkForSchoolBangumi**
|
||||
- **NhkForSchoolProgramList**
|
||||
- **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)
|
||||
- **NhkVod**
|
||||
- **NhkVodProgram**
|
||||
- **nhl.com**
|
||||
@@ -721,7 +770,10 @@
|
||||
- **nickelodeonru**
|
||||
- **nicknight**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **niconico:history**: NicoNico user history. Requires cookies.
|
||||
- **niconico:playlist**
|
||||
- **niconico:series**
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
- **NiconicoUser**
|
||||
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
|
||||
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
|
||||
@@ -733,6 +785,7 @@
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **NonkTube**
|
||||
- **NoodleMagazine**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
@@ -785,6 +838,7 @@
|
||||
- **OpencastPlaylist**
|
||||
- **openrec**
|
||||
- **openrec:capture**
|
||||
- **openrec:movie**
|
||||
- **OraTV**
|
||||
- **orf:burgenland**: Radio Burgenland
|
||||
- **orf:fm4**: radio FM4
|
||||
@@ -818,6 +872,7 @@
|
||||
- **PatreonUser**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **PearVideo**
|
||||
- **PeekVids**
|
||||
- **peer.tv**
|
||||
- **PeerTube**
|
||||
- **PeerTube:Playlist**
|
||||
@@ -830,6 +885,7 @@
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **Piapro**
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
@@ -849,6 +905,7 @@
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **PlayVids**
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
@@ -856,6 +913,8 @@
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PokemonWatch**
|
||||
- **PokerGo**
|
||||
- **PokerGoCollection**
|
||||
- **PolsatGo**
|
||||
- **PolskieRadio**
|
||||
- **polskieradio:kierowcow**
|
||||
@@ -867,6 +926,7 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **Pornez**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
@@ -881,6 +941,11 @@
|
||||
- **PressTV**
|
||||
- **ProjectVeritas**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **PRXAccount**
|
||||
- **PRXSeries**
|
||||
- **prxseries:search**: PRX Series Search; "prxseries:" prefix
|
||||
- **prxstories:search**: PRX Stories Search; "prxstories:" prefix
|
||||
- **PRXStory**
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
@@ -914,8 +979,9 @@
|
||||
- **RaiPlay**
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RaiPlayRadio**
|
||||
- **RaiPlayRadioPlaylist**
|
||||
- **RaiPlaySound**
|
||||
- **RaiPlaySoundLive**
|
||||
- **RaiPlaySoundPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
@@ -944,18 +1010,23 @@
|
||||
- **RICE**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **Rokfin**
|
||||
- **rokfin:channel**
|
||||
- **rokfin:stack**
|
||||
- **RoosterTeeth**
|
||||
- **RoosterTeethSeries**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **Rozhlas**
|
||||
- **RTBF**
|
||||
- **RTDocumentry**
|
||||
- **RTDocumentryPlaylist**
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **rtl2**
|
||||
- **rtl2:you**
|
||||
- **rtl2:you:series**
|
||||
- **RTNews**
|
||||
- **RTP**
|
||||
- **RTRFM**
|
||||
- **RTS**: RTS.ch
|
||||
@@ -967,8 +1038,10 @@
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **RUHD**
|
||||
- **Rule34Video**
|
||||
- **RumbleChannel**
|
||||
- **RumbleEmbed**
|
||||
- **Ruptly**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channel
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
@@ -979,6 +1052,7 @@
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Ruutu**
|
||||
- **Ruv**
|
||||
- **ruv.is:spila**
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
@@ -1109,12 +1183,16 @@
|
||||
- **TeamTreeHouse**
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **TedEmbed**
|
||||
- **TedPlaylist**
|
||||
- **TedSeries**
|
||||
- **TedTalk**
|
||||
- **Tele13**
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **telegram:embed**
|
||||
- **TeleMB**
|
||||
- **Telemundo**
|
||||
- **TeleQuebec**
|
||||
@@ -1131,7 +1209,6 @@
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **ThetaStream**
|
||||
@@ -1148,6 +1225,7 @@
|
||||
- **tiktok:tag**
|
||||
- **tiktok:user**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TLC**
|
||||
- **TMZ**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
@@ -1160,6 +1238,7 @@
|
||||
- **Toypics**: Toypics video
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **TravelChannel**
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
|
||||
@@ -1207,6 +1286,8 @@
|
||||
- **TVNowNew**
|
||||
- **TVNowSeason**
|
||||
- **TVNowShow**
|
||||
- **tvopengr:embed**: tvopen.gr embedded videos
|
||||
- **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
@@ -1270,9 +1351,11 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken)
|
||||
- **video.google:search**: Google Video search; "gvsearch:" prefix
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
- **VideocampusSachsen**
|
||||
- **VideocampusSachsenEmbed**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videomore**
|
||||
@@ -1299,6 +1382,8 @@
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
- **Vimm:recording**
|
||||
- **Vimm:stream**
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
@@ -1313,6 +1398,7 @@
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **vlive:post**
|
||||
- **vm.tiktok**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
@@ -1332,7 +1418,6 @@
|
||||
- **VShare**
|
||||
- **VTM**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **Vupload**
|
||||
- **VVVVID**
|
||||
@@ -1348,10 +1433,10 @@
|
||||
- **WatchBox**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **wdr:mobile** (Currently broken)
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
@@ -1383,6 +1468,7 @@
|
||||
- **xiami:song**: 虾米音乐
|
||||
- **ximalaya**: 喜马拉雅FM
|
||||
- **ximalaya:album**: 喜马拉雅FM 专辑
|
||||
- **xinpianchang**: xinpianchang.com
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
@@ -1402,6 +1488,7 @@
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
- **YandexVideoPreview**
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
@@ -1418,6 +1505,7 @@
|
||||
- **youtube**: YouTube
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections (Eg: #songs)
|
||||
- **youtube:playlist**: YouTube playlists
|
||||
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
|
||||
- **youtube:search**: YouTube search; "ytsearch:" prefix
|
||||
@@ -1425,9 +1513,10 @@
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
- **youtube:user**: YouTube user videos; "ytuser:" prefix
|
||||
- **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
|
||||
- **YoutubeLivestreamEmbed**: YouTube livestream embeds
|
||||
- **YoutubeYtBe**: youtu.be
|
||||
- **YoutubeYtUser**: YouTube user videos; "ytuser:" prefix
|
||||
- **Zapiks**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
@@ -1438,7 +1527,7 @@
|
||||
- **ZenYandex**
|
||||
- **ZenYandexChannel**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **zingmp3**: zingmp3.vn
|
||||
- **zingmp3:album**
|
||||
- **zoom**
|
||||
- **Zype**
|
||||
|
||||
@@ -220,7 +220,7 @@ def sanitize_got_info_dict(got_dict):
|
||||
IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage')
|
||||
|
||||
def sanitize(key, value):
|
||||
if isinstance(value, str) and len(value) > 100:
|
||||
if isinstance(value, str) and len(value) > 100 and key != 'thumbnail':
|
||||
return f'md5:{md5(value)}'
|
||||
elif isinstance(value, list) and len(value) > 10:
|
||||
return f'count:{len(value)}'
|
||||
|
||||
@@ -30,9 +30,7 @@ class YDL(FakeYDL):
|
||||
self.msgs = []
|
||||
|
||||
def process_info(self, info_dict):
|
||||
info_dict = info_dict.copy()
|
||||
info_dict.pop('__original_infodict', None)
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
self.downloaded_info_dicts.append(info_dict.copy())
|
||||
|
||||
def to_screen(self, msg):
|
||||
self.msgs.append(msg)
|
||||
@@ -898,20 +896,6 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
os.unlink(filename)
|
||||
|
||||
def test_match_filter(self):
|
||||
class FilterYDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||
self.params['simulate'] = True
|
||||
|
||||
def process_info(self, info_dict):
|
||||
super(YDL, self).process_info(info_dict)
|
||||
|
||||
def _match_entry(self, info_dict, incomplete=False):
|
||||
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||
if res is None:
|
||||
self.downloaded_info_dicts.append(info_dict.copy())
|
||||
return res
|
||||
|
||||
first = {
|
||||
'id': '1',
|
||||
'url': TEST_URL,
|
||||
@@ -939,7 +923,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
videos = [first, second]
|
||||
|
||||
def get_videos(filter_=None):
|
||||
ydl = FilterYDL({'match_filter': filter_})
|
||||
ydl = YDL({'match_filter': filter_, 'simulate': True})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
@@ -1133,7 +1133,7 @@ class TestUtil(unittest.TestCase):
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
|
||||
@@ -9,11 +9,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
|
||||
|
||||
from yt_dlp.extractor import (
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
YoutubeTabIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,21 +25,10 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
dl.params['noplaylist'] = True
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_youtube_course(self):
|
||||
print('Skipping: Course URLs no longer exists')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
self.assertEqual(result['ie_key'], YoutubeIE.ie_key())
|
||||
self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y')
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
@@ -52,15 +39,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'tyITL_exICo')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
|
||||
@@ -86,6 +86,14 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -116,10 +124,17 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
|
||||
@@ -32,6 +32,7 @@ from string import ascii_letters
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_brotli,
|
||||
compat_get_terminal_size,
|
||||
compat_kwargs,
|
||||
compat_numeric_types,
|
||||
@@ -72,6 +73,7 @@ from .utils import (
|
||||
GeoRestrictedError,
|
||||
get_domain,
|
||||
HEADRequest,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
iri_to_uri,
|
||||
ISO3166Utils,
|
||||
@@ -82,6 +84,7 @@ from .utils import (
|
||||
make_dir,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
@@ -200,9 +203,12 @@ class YoutubeDL(object):
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
forceprint: A dict with keys video/playlist mapped to
|
||||
a list of templates to force print to stdout
|
||||
forceprint: A dict with keys WHEN mapped to a list of templates to
|
||||
print to stdout. The allowed keys are video or any of the
|
||||
items in utils.POSTPROCESS_WHEN.
|
||||
For compatibility, a single list is also accepted
|
||||
print_to_file: A dict with keys WHEN (same as forceprint) mapped to
|
||||
a list of tuples with (template, filename)
|
||||
forceurl: Force printing final URL. (Deprecated)
|
||||
forcetitle: Force printing title. (Deprecated)
|
||||
forceid: Force printing ID. (Deprecated)
|
||||
@@ -229,6 +235,8 @@ class YoutubeDL(object):
|
||||
See "Sorting Formats" for more details.
|
||||
format_sort_force: Force the given format_sort. see "Sorting Formats"
|
||||
for more details.
|
||||
prefer_free_formats: Whether to prefer video formats with free containers
|
||||
over non-free ones of same quality.
|
||||
allow_multiple_video_streams: Allow multiple video streams to be merged
|
||||
into a single file
|
||||
allow_multiple_audio_streams: Allow multiple audio streams to be merged
|
||||
@@ -328,6 +336,7 @@ class YoutubeDL(object):
|
||||
nocheckcertificate: Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
http_headers: A dictionary of custom headers to be used for all requests
|
||||
proxy: URL of the proxy server to use
|
||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on geo-restricted sites.
|
||||
@@ -348,8 +357,8 @@ class YoutubeDL(object):
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
yt_dlp/postprocessor/__init__.py for a list.
|
||||
* when: When to run the postprocessor. Can be one of
|
||||
pre_process|before_dl|post_process|after_move.
|
||||
* when: When to run the postprocessor. Allowed values are
|
||||
the entries of utils.POSTPROCESS_WHEN
|
||||
Assumed to be 'post_process' if not given
|
||||
post_hooks: Deprecated - Register a custom postprocessor instead
|
||||
A list of functions that get called as the final step
|
||||
@@ -480,6 +489,7 @@ class YoutubeDL(object):
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Eg: {'youtube': {'skip': ['dash', 'hls']}}
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
|
||||
If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
@@ -591,12 +601,14 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
# Compatibility with older syntax
|
||||
params.setdefault('forceprint', {})
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
params['forceprint'] = {'video': params['forceprint']}
|
||||
self.params.setdefault('forceprint', {})
|
||||
self.params.setdefault('print_to_file', {})
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
# Compatibility with older syntax
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
self.params['forceprint'] = {'video': params['forceprint']}
|
||||
|
||||
if self.params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
master, slave = pty.openpty()
|
||||
@@ -624,7 +636,7 @@ class YoutubeDL(object):
|
||||
|
||||
if (sys.platform != 'win32'
|
||||
and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
and not params.get('restrictfilenames', False)):
|
||||
and not self.params.get('restrictfilenames', False)):
|
||||
# Unicode filesystem API will throw errors (#1474, #13027)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
@@ -640,6 +652,9 @@ class YoutubeDL(object):
|
||||
else self.params['format'] if callable(self.params['format'])
|
||||
else self.build_format_selector(self.params['format']))
|
||||
|
||||
# Set http_headers defaults according to std_headers
|
||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
if auto_init:
|
||||
@@ -881,7 +896,8 @@ class YoutubeDL(object):
|
||||
def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
|
||||
if test_encoding:
|
||||
original_text = text
|
||||
encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
|
||||
# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
|
||||
encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
|
||||
text = text.encode(encoding, 'ignore').decode(encoding)
|
||||
if fallback is not None and text != original_text:
|
||||
text = fallback
|
||||
@@ -946,13 +962,13 @@ class YoutubeDL(object):
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('Deleting existing file')
|
||||
|
||||
def raise_no_formats(self, info, forced=False):
|
||||
def raise_no_formats(self, info, forced=False, *, msg=None):
|
||||
has_drm = info.get('__has_drm')
|
||||
msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
|
||||
expected = self.params.get('ignore_no_formats_error')
|
||||
if forced or not expected:
|
||||
ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
|
||||
msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
|
||||
if forced or not ignored:
|
||||
raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
|
||||
expected=has_drm or expected)
|
||||
expected=has_drm or ignored or expected)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
|
||||
@@ -1029,8 +1045,7 @@ class YoutubeDL(object):
|
||||
@staticmethod
|
||||
def _copy_infodict(info_dict):
|
||||
info_dict = dict(info_dict)
|
||||
for key in ('__original_infodict', '__postprocessors'):
|
||||
info_dict.pop(key, None)
|
||||
info_dict.pop('__postprocessors', None)
|
||||
return info_dict
|
||||
|
||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||
@@ -1215,10 +1230,17 @@ class YoutubeDL(object):
|
||||
try:
|
||||
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
||||
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||
if filename and force_ext is not None:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
if tmpl_type in ('default', 'temp'):
|
||||
final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
|
||||
if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
|
||||
filename = replace_extension(filename, ext, final_ext)
|
||||
else:
|
||||
force_ext = OUTTMPL_TYPES[tmpl_type]
|
||||
if force_ext:
|
||||
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
|
||||
|
||||
# https://github.com/blackjack4494/youtube-dlc/issues/85
|
||||
trim_file_name = self.params.get('trim_file_name', False)
|
||||
@@ -1457,8 +1479,12 @@ class YoutubeDL(object):
|
||||
self.add_extra_info(ie_result, {
|
||||
'webpage_url': url,
|
||||
'original_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'webpage_url_domain': get_domain(url),
|
||||
})
|
||||
webpage_url = ie_result.get('webpage_url')
|
||||
if webpage_url:
|
||||
self.add_extra_info(ie_result, {
|
||||
'webpage_url_basename': url_basename(webpage_url),
|
||||
'webpage_url_domain': get_domain(webpage_url),
|
||||
})
|
||||
if ie is not None:
|
||||
self.add_extra_info(ie_result, {
|
||||
@@ -1566,6 +1592,7 @@ class YoutubeDL(object):
|
||||
|
||||
self._playlist_level += 1
|
||||
self._playlist_urls.add(webpage_url)
|
||||
self._fill_common_fields(ie_result, False)
|
||||
self._sanitize_thumbnails(ie_result)
|
||||
try:
|
||||
return self.__process_playlist(ie_result, download)
|
||||
@@ -1662,6 +1689,9 @@ class YoutubeDL(object):
|
||||
msg = 'Downloading %d videos'
|
||||
if not isinstance(ie_entries, (PagedList, LazyList)):
|
||||
ie_entries = LazyList(ie_entries)
|
||||
elif isinstance(ie_entries, InAdvancePagedList):
|
||||
if ie_entries._pagesize == 1:
|
||||
playlist_count = ie_entries._pagecount
|
||||
|
||||
def get_entry(i):
|
||||
return YoutubeDL.__handle_extraction_exceptions(
|
||||
@@ -1825,15 +1855,21 @@ class YoutubeDL(object):
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
'~=': lambda attr, value: value.search(attr) is not None
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>[a-zA-Z0-9._-]+)\s*
|
||||
(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[a-zA-Z0-9._-]+)\s*
|
||||
(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
|
||||
(?P<quote>["'])?
|
||||
(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
|
||||
(?(quote)(?P=quote))\s*
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.fullmatch(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
if m.group('op') == '~=':
|
||||
comparison_value = re.compile(m.group('value'))
|
||||
else:
|
||||
comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
|
||||
str_op = STR_OPERATORS[m.group('op')]
|
||||
if m.group('negation'):
|
||||
op = lambda attr, value: not str_op(attr, value)
|
||||
@@ -2222,11 +2258,7 @@ class YoutubeDL(object):
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
res.update(add_headers)
|
||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||
|
||||
cookies = self._calc_cookies(info_dict)
|
||||
if cookies:
|
||||
@@ -2284,55 +2316,17 @@ class YoutubeDL(object):
|
||||
else:
|
||||
info_dict['thumbnails'] = thumbnails
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
self._num_videos += 1
|
||||
|
||||
if 'id' not in info_dict:
|
||||
raise ExtractorError('Missing "id" field in extractor result')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
% (field, field_not, conversion))
|
||||
|
||||
def sanitize_string_field(info, string_field):
|
||||
field = info.get(string_field)
|
||||
if field is None or isinstance(field, compat_str):
|
||||
return
|
||||
report_force_conversion(string_field, 'a string', 'string')
|
||||
info[string_field] = compat_str(field)
|
||||
|
||||
def sanitize_numeric_fields(info):
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
field = info.get(numeric_field)
|
||||
if field is None or isinstance(field, compat_numeric_types):
|
||||
continue
|
||||
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||
info[numeric_field] = int_or_none(field)
|
||||
|
||||
sanitize_string_field(info_dict, 'id')
|
||||
sanitize_numeric_fields(info_dict)
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
self._sanitize_thumbnails(info_dict)
|
||||
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnail:
|
||||
info_dict['thumbnail'] = sanitize_url(thumbnail)
|
||||
elif thumbnails:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if info_dict.get('display_id') is None and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
def _fill_common_fields(self, info_dict, is_video=True):
|
||||
# TODO: move sanitization here
|
||||
if is_video:
|
||||
# playlists are allowed to lack "title"
|
||||
info_dict['fulltitle'] = info_dict.get('title')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||
elif not info_dict.get('title'):
|
||||
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
|
||||
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
|
||||
|
||||
if info_dict.get('duration') is not None:
|
||||
info_dict['duration_string'] = formatSeconds(info_dict['duration'])
|
||||
@@ -2374,6 +2368,59 @@ class YoutubeDL(object):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
self._num_videos += 1
|
||||
|
||||
if 'id' not in info_dict:
|
||||
raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
|
||||
elif not info_dict.get('id'):
|
||||
raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
% (field, field_not, conversion))
|
||||
|
||||
def sanitize_string_field(info, string_field):
|
||||
field = info.get(string_field)
|
||||
if field is None or isinstance(field, compat_str):
|
||||
return
|
||||
report_force_conversion(string_field, 'a string', 'string')
|
||||
info[string_field] = compat_str(field)
|
||||
|
||||
def sanitize_numeric_fields(info):
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
field = info.get(numeric_field)
|
||||
if field is None or isinstance(field, compat_numeric_types):
|
||||
continue
|
||||
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||
info[numeric_field] = int_or_none(field)
|
||||
|
||||
sanitize_string_field(info_dict, 'id')
|
||||
sanitize_numeric_fields(info_dict)
|
||||
if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
|
||||
self.report_warning('"duration" field is negative, there is an error in extractor')
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
self._sanitize_thumbnails(info_dict)
|
||||
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnail:
|
||||
info_dict['thumbnail'] = sanitize_url(thumbnail)
|
||||
elif thumbnails:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if info_dict.get('display_id') is None and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
self._fill_common_fields(info_dict)
|
||||
|
||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||
cc = info_dict.get(cc_kind)
|
||||
if cc:
|
||||
@@ -2400,14 +2447,14 @@ class YoutubeDL(object):
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
|
||||
# backward compatibility
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
|
||||
if info_dict.get('is_live'):
|
||||
get_from_start = bool(self.params.get('live_from_start'))
|
||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||
if not get_from_start:
|
||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
if info_dict.get('is_live') and formats:
|
||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||
if not get_from_start:
|
||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
if get_from_start and not formats:
|
||||
self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
|
||||
'If you want to download from the current time, pass --no-live-from-start')
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
@@ -2483,8 +2530,6 @@ class YoutubeDL(object):
|
||||
if '__x_forwarded_for_ip' in info_dict:
|
||||
del info_dict['__x_forwarded_for_ip']
|
||||
|
||||
# TODO Central sorting goes here
|
||||
|
||||
if self.params.get('check_formats') is True:
|
||||
formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
|
||||
|
||||
@@ -2497,6 +2542,12 @@ class YoutubeDL(object):
|
||||
|
||||
info_dict, _ = self.pre_process(info_dict)
|
||||
|
||||
if self._match_entry(info_dict) is not None:
|
||||
return info_dict
|
||||
|
||||
self.post_extract(info_dict)
|
||||
info_dict, _ = self.pre_process(info_dict, 'after_filter')
|
||||
|
||||
# The pre-processors may have modified the formats
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
|
||||
@@ -2581,15 +2632,12 @@ class YoutubeDL(object):
|
||||
+ ', '.join([f['format_id'] for f in formats_to_download]))
|
||||
max_downloads_reached = False
|
||||
for i, fmt in enumerate(formats_to_download):
|
||||
formats_to_download[i] = new_info = dict(info_dict)
|
||||
# Save a reference to the original info_dict so that it can be modified in process_info if needed
|
||||
formats_to_download[i] = new_info = self._copy_infodict(info_dict)
|
||||
new_info.update(fmt)
|
||||
new_info['__original_infodict'] = info_dict
|
||||
try:
|
||||
self.process_info(new_info)
|
||||
except MaxDownloadsReached:
|
||||
max_downloads_reached = True
|
||||
new_info.pop('__original_infodict')
|
||||
# Remove copied info
|
||||
for key, val in tuple(new_info.items()):
|
||||
if info_dict.get(key) == val:
|
||||
@@ -2634,12 +2682,15 @@ class YoutubeDL(object):
|
||||
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
|
||||
requested_langs = []
|
||||
for lang_re in self.params.get('subtitleslangs'):
|
||||
if lang_re == 'all':
|
||||
requested_langs.extend(all_sub_langs)
|
||||
continue
|
||||
discard = lang_re[0] == '-'
|
||||
if discard:
|
||||
lang_re = lang_re[1:]
|
||||
if lang_re == 'all':
|
||||
if discard:
|
||||
requested_langs = []
|
||||
else:
|
||||
requested_langs.extend(all_sub_langs)
|
||||
continue
|
||||
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
|
||||
if discard:
|
||||
for lang in current_langs:
|
||||
@@ -2679,19 +2730,33 @@ class YoutubeDL(object):
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def _forceprint(self, tmpl, info_dict):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
tmpl = '%({})s'.format(tmpl)
|
||||
def _forceprint(self, key, info_dict):
|
||||
if info_dict is None:
|
||||
return
|
||||
info_copy = info_dict.copy()
|
||||
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
|
||||
info_dict = info_dict.copy()
|
||||
info_dict['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
|
||||
self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
|
||||
def format_tmpl(tmpl):
|
||||
mobj = re.match(r'\w+(=?)$', tmpl)
|
||||
if mobj and mobj.group(1):
|
||||
return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
|
||||
elif mobj:
|
||||
return f'%({tmpl})s'
|
||||
return tmpl
|
||||
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
|
||||
|
||||
for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
|
||||
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
|
||||
tmpl = format_tmpl(tmpl)
|
||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||
if self._ensure_dir_exists(filename):
|
||||
with io.open(filename, 'a', encoding='utf-8') as f:
|
||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field, actual_field=None):
|
||||
@@ -2715,10 +2780,11 @@ class YoutubeDL(object):
|
||||
elif 'url' in info_dict:
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
|
||||
if self.params['forceprint'].get('video') or self.params.get('forcejson'):
|
||||
if (self.params.get('forcejson')
|
||||
or self.params['forceprint'].get('video')
|
||||
or self.params['print_to_file'].get('video')):
|
||||
self.post_extract(info_dict)
|
||||
for tmpl in self.params['forceprint'].get('video', []):
|
||||
self._forceprint(tmpl, info_dict)
|
||||
self._forceprint('video', info_dict)
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
@@ -2779,7 +2845,7 @@ class YoutubeDL(object):
|
||||
return None
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result. (Modified it in-place)"""
|
||||
"""Process a single resolved IE result. (Modifies it in-place)"""
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
original_infodict = info_dict
|
||||
@@ -2787,18 +2853,22 @@ class YoutubeDL(object):
|
||||
if 'format' not in info_dict and 'ext' in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
# This is mostly just for backward compatibility of process_info
|
||||
# As a side-effect, this allows for format-specific filters
|
||||
if self._match_entry(info_dict) is not None:
|
||||
info_dict['__write_download_archive'] = 'ignore'
|
||||
return
|
||||
|
||||
# Does nothing under normal operation - for backward compatibility of process_info
|
||||
self.post_extract(info_dict)
|
||||
self._num_downloads += 1
|
||||
|
||||
# info_dict['_filename'] needs to be set for backward compatibility
|
||||
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
|
||||
temp_filename = self.prepare_filename(info_dict, 'temp')
|
||||
files_to_move = {}
|
||||
|
||||
self._num_downloads += 1
|
||||
|
||||
# Forced printings
|
||||
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
|
||||
|
||||
@@ -2861,9 +2931,11 @@ class YoutubeDL(object):
|
||||
|
||||
# Write internet shortcut files
|
||||
def _write_link_file(link_type):
|
||||
if 'webpage_url' not in info_dict:
|
||||
self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
|
||||
return False
|
||||
url = try_get(info_dict['webpage_url'], iri_to_uri)
|
||||
if not url:
|
||||
self.report_warning(
|
||||
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
|
||||
return True
|
||||
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
|
||||
if not self._ensure_dir_exists(encodeFilename(linkfn)):
|
||||
return False
|
||||
@@ -2874,7 +2946,7 @@ class YoutubeDL(object):
|
||||
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
|
||||
with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
|
||||
newline='\r\n' if link_type == 'url' else '\n') as linkfile:
|
||||
template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
|
||||
template_vars = {'url': url}
|
||||
if link_type == 'desktop':
|
||||
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
|
||||
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
|
||||
@@ -3009,9 +3081,11 @@ class YoutubeDL(object):
|
||||
'while also allowing unplayable formats to be downloaded. '
|
||||
'The formats won\'t be merged to prevent data corruption.')
|
||||
elif not merger.available:
|
||||
self.report_warning(
|
||||
'You have requested merging of multiple formats but ffmpeg is not installed. '
|
||||
'The formats won\'t be merged.')
|
||||
msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
|
||||
if not self.params.get('ignoreerrors'):
|
||||
self.report_error(f'{msg}. Aborting due to --abort-on-error')
|
||||
return
|
||||
self.report_warning(f'{msg}. The formats won\'t be merged')
|
||||
|
||||
if temp_filename == '-':
|
||||
reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
|
||||
@@ -3207,17 +3281,15 @@ class YoutubeDL(object):
|
||||
if info_dict is None:
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
|
||||
keep_keys = ['_type'] # Always keep this to facilitate load-info-json
|
||||
info_dict.setdefault('_type', 'video')
|
||||
|
||||
if remove_private_keys:
|
||||
remove_keys |= {
|
||||
reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
|
||||
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
|
||||
'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
|
||||
}
|
||||
reject = lambda k, v: k not in keep_keys and (
|
||||
k.startswith('_') or k in remove_keys or v is None)
|
||||
else:
|
||||
reject = lambda k, v: k in remove_keys
|
||||
reject = lambda k, v: False
|
||||
|
||||
def filter_fn(obj):
|
||||
if isinstance(obj, dict):
|
||||
@@ -3244,14 +3316,8 @@ class YoutubeDL(object):
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
post_extractor = info_dict.get('__post_extractor') or (lambda: {})
|
||||
extra = post_extractor().items()
|
||||
info_dict.update(extra)
|
||||
info_dict.pop('__post_extractor', None)
|
||||
|
||||
original_infodict = info_dict.get('__original_infodict') or {}
|
||||
original_infodict.update(extra)
|
||||
original_infodict.pop('__post_extractor', None)
|
||||
post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
|
||||
info_dict.update(post_extractor())
|
||||
|
||||
actual_post_extract(info_dict or {})
|
||||
|
||||
@@ -3285,8 +3351,7 @@ class YoutubeDL(object):
|
||||
return infodict
|
||||
|
||||
def run_all_pps(self, key, info, *, additional_pps=None):
|
||||
for tmpl in self.params['forceprint'].get(key, []):
|
||||
self._forceprint(tmpl, info)
|
||||
self._forceprint(key, info)
|
||||
for pp in (additional_pps or []) + self._pps[key]:
|
||||
info = self.run_pp(pp, info)
|
||||
return info
|
||||
@@ -3481,7 +3546,7 @@ class YoutubeDL(object):
|
||||
delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
|
||||
|
||||
def render_thumbnails_table(self, info_dict):
|
||||
thumbnails = list(info_dict.get('thumbnails'))
|
||||
thumbnails = list(info_dict.get('thumbnails') or [])
|
||||
if not thumbnails:
|
||||
return None
|
||||
return render_table(
|
||||
@@ -3530,7 +3595,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
def get_encoding(stream):
|
||||
ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
|
||||
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
|
||||
if not supports_terminal_sequences(stream):
|
||||
from .compat import WINDOWS_VT_MODE
|
||||
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
|
||||
@@ -3613,6 +3678,7 @@ class YoutubeDL(object):
|
||||
from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
|
||||
|
||||
lib_str = join_nonempty(
|
||||
compat_brotli and compat_brotli.__name__,
|
||||
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
|
||||
SECRETSTORAGE_AVAILABLE and 'secretstorage',
|
||||
has_mutagen and 'mutagen',
|
||||
@@ -3828,7 +3894,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
uf = self.urlopen(t['url'])
|
||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
|
||||
@@ -41,6 +41,7 @@ from .utils import (
|
||||
SameFileError,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
write_string,
|
||||
)
|
||||
from .update import run_update
|
||||
@@ -75,20 +76,15 @@ def _real_main(argv=None):
|
||||
parser, opts, args = parseOpts(argv)
|
||||
warnings, deprecation_warnings = [], []
|
||||
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
# Set referer
|
||||
opts.headers.setdefault('User-Agent', opts.user_agent)
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
||||
# Custom HTTP headers
|
||||
std_headers.update(opts.headers)
|
||||
opts.headers.setdefault('Referer', opts.referer)
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
|
||||
write_string(f'{ua}\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
@@ -335,6 +331,9 @@ def _real_main(argv=None):
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
_unused_compat_opt('multistreams')
|
||||
outtmpl_default = opts.outtmpl.get('default')
|
||||
if outtmpl_default == '':
|
||||
outtmpl_default, opts.skip_download = None, True
|
||||
del opts.outtmpl['default']
|
||||
if opts.useid:
|
||||
if outtmpl_default is None:
|
||||
outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s'
|
||||
@@ -356,6 +355,10 @@ def _real_main(argv=None):
|
||||
for type_, tmpl_list in opts.forceprint.items():
|
||||
for tmpl in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print template')
|
||||
for type_, tmpl_list in opts.print_to_file.items():
|
||||
for tmpl, file in tmpl_list:
|
||||
validate_outtmpl(tmpl, f'{type_} print-to-file template')
|
||||
validate_outtmpl(file, f'{type_} print-to-file filename')
|
||||
validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
|
||||
for k, tmpl in opts.progress_template.items():
|
||||
k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
|
||||
@@ -467,8 +470,8 @@ def _real_main(argv=None):
|
||||
'key': 'SponsorBlock',
|
||||
'categories': sponsorblock_query,
|
||||
'api': opts.sponsorblock_api,
|
||||
# Run this immediately after extraction is complete
|
||||
'when': 'pre_process'
|
||||
# Run this after filtering videos
|
||||
'when': 'after_filter'
|
||||
})
|
||||
if opts.parse_metadata:
|
||||
postprocessors.append({
|
||||
@@ -663,6 +666,7 @@ def _real_main(argv=None):
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forceprint': opts.forceprint,
|
||||
'print_to_file': opts.print_to_file,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'force_write_download_archive': opts.force_write_download_archive,
|
||||
@@ -759,6 +763,7 @@ def _real_main(argv=None):
|
||||
'legacyserverconnect': opts.legacy_server_connect,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'http_headers': opts.headers,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
|
||||
@@ -2,8 +2,15 @@ from __future__ import unicode_literals
|
||||
|
||||
from math import ceil
|
||||
|
||||
from .compat import compat_b64decode, compat_pycrypto_AES
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_pycrypto_AES,
|
||||
)
|
||||
from .utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
)
|
||||
|
||||
|
||||
if compat_pycrypto_AES:
|
||||
@@ -25,6 +32,10 @@ else:
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def unpad_pkcs7(data):
|
||||
return data[:-compat_ord(data[-1])]
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
@@ -506,5 +517,6 @@ __all__ = [
|
||||
'aes_encrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
'key_expansion'
|
||||
'key_expansion',
|
||||
'unpad_pkcs7',
|
||||
]
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import collections
|
||||
import ctypes
|
||||
import getpass
|
||||
import html
|
||||
@@ -133,6 +134,16 @@ except AttributeError:
|
||||
asyncio.run = compat_asyncio_run
|
||||
|
||||
|
||||
try: # >= 3.7
|
||||
asyncio.tasks.all_tasks
|
||||
except AttributeError:
|
||||
asyncio.tasks.all_tasks = asyncio.tasks.Task.all_tasks
|
||||
|
||||
try:
|
||||
import websockets as compat_websockets
|
||||
except ImportError:
|
||||
compat_websockets = None
|
||||
|
||||
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/792
|
||||
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
|
||||
@@ -159,6 +170,13 @@ except ImportError:
|
||||
except ImportError:
|
||||
compat_pycrypto_AES = None
|
||||
|
||||
try:
|
||||
import brotlicffi as compat_brotli
|
||||
except ImportError:
|
||||
try:
|
||||
import brotli as compat_brotli
|
||||
except ImportError:
|
||||
compat_brotli = None
|
||||
|
||||
WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
|
||||
|
||||
@@ -180,14 +198,17 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho
|
||||
|
||||
compat_basestring = str
|
||||
compat_chr = chr
|
||||
compat_filter = filter
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_str = str
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
|
||||
compat_collections_abc = collections.abc
|
||||
compat_HTMLParser = html.parser.HTMLParser
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_Struct = struct.Struct
|
||||
@@ -244,7 +265,9 @@ __all__ = [
|
||||
'compat_asyncio_run',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_brotli',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
@@ -254,6 +277,7 @@ __all__ = [
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_filter',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
@@ -265,6 +289,7 @@ __all__ = [
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
@@ -296,6 +321,7 @@ __all__ = [
|
||||
'compat_urllib_response',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_websockets',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'compat_zip',
|
||||
|
||||
@@ -11,7 +11,11 @@ from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
|
||||
from .aes import (
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import (
|
||||
compat_b64decode,
|
||||
compat_cookiejar_Cookie,
|
||||
@@ -450,7 +454,10 @@ def _extract_safari_cookies(profile, logger):
|
||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
logger.debug('Trying secondary cookie location')
|
||||
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
|
||||
if not os.path.isfile(cookies_path):
|
||||
raise FileNotFoundError('could not find safari cookies database')
|
||||
|
||||
with open(cookies_path, 'rb') as f:
|
||||
cookies_data = f.read()
|
||||
@@ -669,8 +676,7 @@ def _get_linux_desktop_environment(env):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
def _choose_linux_keyring(logger):
|
||||
@@ -847,10 +853,9 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||
|
||||
|
||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
||||
plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)
|
||||
padding_length = plaintext[-1]
|
||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||
try:
|
||||
return plaintext[:-padding_length].decode('utf-8')
|
||||
return plaintext.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||
return None
|
||||
|
||||
@@ -30,6 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||
from .common import FileDownloader
|
||||
from .dash import DashSegmentsFD
|
||||
from .f4m import F4mFD
|
||||
from .fc2 import FC2LiveFD
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
@@ -58,6 +59,7 @@ PROTOCOL_MAP = {
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||
@@ -117,7 +119,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
|
||||
return FFmpegFD
|
||||
elif (external_downloader or '').lower() == 'native':
|
||||
return HlsFD
|
||||
elif get_suitable_downloader(
|
||||
elif protocol == 'm3u8_native' and get_suitable_downloader(
|
||||
info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
|
||||
return HlsFD
|
||||
elif params.get('hls_prefer_native') is True:
|
||||
|
||||
@@ -210,28 +210,41 @@ class FileDownloader(object):
|
||||
def ytdl_filename(self, filename):
|
||||
return filename + '.ytdl'
|
||||
|
||||
def sanitize_open(self, filename, open_mode):
|
||||
file_access_retries = self.params.get('file_access_retries', 10)
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
return sanitize_open(filename, open_mode)
|
||||
except (IOError, OSError) as err:
|
||||
retry = retry + 1
|
||||
if retry > file_access_retries or err.errno not in (errno.EACCES,):
|
||||
raise
|
||||
self.to_screen(
|
||||
'[download] Got file access error. Retrying (attempt %d of %s) ...'
|
||||
% (retry, self.format_retries(file_access_retries)))
|
||||
time.sleep(0.01)
|
||||
def wrap_file_access(action, *, fatal=False):
|
||||
def outer(func):
|
||||
def inner(self, *args, **kwargs):
|
||||
file_access_retries = self.params.get('file_access_retries', 0)
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
return func(self, *args, **kwargs)
|
||||
except (IOError, OSError) as err:
|
||||
retry = retry + 1
|
||||
if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
|
||||
if not fatal:
|
||||
self.report_error(f'unable to {action} file: {err}')
|
||||
return
|
||||
raise
|
||||
self.to_screen(
|
||||
f'[download] Unable to {action} file due to file access error. '
|
||||
f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
|
||||
time.sleep(0.01)
|
||||
return inner
|
||||
return outer
|
||||
|
||||
@wrap_file_access('open', fatal=True)
|
||||
def sanitize_open(self, filename, open_mode):
|
||||
return sanitize_open(filename, open_mode)
|
||||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
os.remove(filename)
|
||||
|
||||
@wrap_file_access('rename')
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
try:
|
||||
os.replace(old_filename, new_filename)
|
||||
except (IOError, OSError) as err:
|
||||
self.report_error(f'unable to rename file: {err}')
|
||||
os.replace(old_filename, new_filename)
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
|
||||
@@ -17,11 +17,13 @@ from ..utils import (
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
_configuration_args,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
Popen,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -157,9 +159,9 @@ class ExternalFD(FragmentFD):
|
||||
dest.write(decrypt_fragment(fragment, src.read()))
|
||||
src.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(encodeFilename(fragment_filename))
|
||||
self.try_remove(encodeFilename(fragment_filename))
|
||||
dest.close()
|
||||
os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
return 0
|
||||
|
||||
|
||||
@@ -251,7 +253,7 @@ class Aria2cFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||
else:
|
||||
@@ -463,6 +465,15 @@ class FFmpegFD(ExternalFD):
|
||||
args += ['-f', 'flv']
|
||||
elif ext == 'mp4' and tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
elif ext == 'unknown_video':
|
||||
ext = determine_ext(remove_end(tmpfilename, '.part'))
|
||||
if ext == 'unknown_video':
|
||||
self.report_warning(
|
||||
'The video format is unknown and cannot be downloaded by ffmpeg. '
|
||||
'Explicitly set the extension in the filename to attempt download in that format')
|
||||
else:
|
||||
self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
|
||||
41
yt_dlp/downloader/fc2.py
Normal file
41
yt_dlp/downloader/fc2.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import threading
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
|
||||
|
||||
class FC2LiveFD(FileDownloader):
|
||||
"""
|
||||
Downloads FC2 live without being stopped. <br>
|
||||
Note, this is not a part of public API, and will be removed without notice.
|
||||
DO NOT USE
|
||||
"""
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
ws = info_dict['ws']
|
||||
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_state = [None, 1]
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
heartbeat_state[1] += 1
|
||||
ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
|
||||
except Exception:
|
||||
self.to_screen('[fc2:live] Heartbeat failed')
|
||||
|
||||
with heartbeat_lock:
|
||||
heartbeat_state[0] = threading.Timer(30, heartbeat)
|
||||
heartbeat_state[0]._daemonic = True
|
||||
heartbeat_state[0].start()
|
||||
|
||||
heartbeat()
|
||||
|
||||
new_info_dict = info_dict.copy()
|
||||
new_info_dict.update({
|
||||
'ws': None,
|
||||
'protocol': 'live_ffmpeg',
|
||||
})
|
||||
return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
|
||||
@@ -14,7 +14,7 @@ except ImportError:
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_os_name,
|
||||
compat_urllib_error,
|
||||
@@ -25,6 +25,7 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@@ -136,7 +137,12 @@ class FragmentFD(FileDownloader):
|
||||
if fragment_info_dict.get('filetime'):
|
||||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||
ctx['fragment_filename_sanitized'] = fragment_filename
|
||||
return True, self._read_fragment(ctx)
|
||||
try:
|
||||
return True, self._read_fragment(ctx)
|
||||
except FileNotFoundError:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
return False, None
|
||||
|
||||
def _read_fragment(self, ctx):
|
||||
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
|
||||
@@ -153,7 +159,7 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
@@ -172,7 +178,7 @@ class FragmentFD(FileDownloader):
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'continuedl': self.params.get('continuedl', True),
|
||||
'quiet': self.params.get('quiet'),
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
@@ -299,7 +305,7 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
self.try_remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
if ctx['tmpfilename'] == '-':
|
||||
@@ -366,8 +372,7 @@ class FragmentFD(FileDownloader):
|
||||
# not what it decrypts to.
|
||||
if self.params.get('test', False):
|
||||
return frag_content
|
||||
decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)
|
||||
return decrypted_data[:-decrypted_data[-1]]
|
||||
return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
|
||||
|
||||
return decrypt_fragment
|
||||
|
||||
@@ -383,6 +388,7 @@ class FragmentFD(FileDownloader):
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if max_progress > 1:
|
||||
self._prepare_multiline_status(max_progress)
|
||||
is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[]))
|
||||
|
||||
def thread_func(idx, ctx, fragments, info_dict, tpe):
|
||||
ctx['max_progress'] = max_progress
|
||||
@@ -396,25 +402,43 @@ class FragmentFD(FileDownloader):
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
spins = []
|
||||
if compat_os_name == 'nt':
|
||||
self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. '
|
||||
'This is a known issue and patches are welcome')
|
||||
def bindoj_result(future):
|
||||
while True:
|
||||
try:
|
||||
return future.result(0.1)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except concurrent.futures.TimeoutError:
|
||||
continue
|
||||
else:
|
||||
def bindoj_result(future):
|
||||
return future.result()
|
||||
|
||||
def interrupt_trigger_iter(fg):
|
||||
for f in fg:
|
||||
if not interrupt_trigger[0]:
|
||||
break
|
||||
yield f
|
||||
|
||||
spins = []
|
||||
for idx, (ctx, fragments, info_dict) in enumerate(args):
|
||||
tpe = FTPE(math.ceil(max_workers / max_progress))
|
||||
job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
|
||||
job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe)
|
||||
spins.append((tpe, job))
|
||||
|
||||
result = True
|
||||
for tpe, job in spins:
|
||||
try:
|
||||
result = result and job.result()
|
||||
result = result and bindoj_result(job)
|
||||
except KeyboardInterrupt:
|
||||
interrupt_trigger[0] = False
|
||||
finally:
|
||||
tpe.shutdown(wait=True)
|
||||
if not interrupt_trigger[0]:
|
||||
if not interrupt_trigger[0] and not is_live:
|
||||
raise KeyboardInterrupt()
|
||||
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
|
||||
# so returning a intermediate result here instead of KeyboardInterrupt on live
|
||||
return result
|
||||
|
||||
def download_and_append_fragments(
|
||||
@@ -432,10 +456,11 @@ class FragmentFD(FileDownloader):
|
||||
pack_func = lambda frag_content, _: frag_content
|
||||
|
||||
def download_fragment(fragment, ctx):
|
||||
if not interrupt_trigger[0]:
|
||||
return False, fragment['frag_index']
|
||||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
if not interrupt_trigger[0]:
|
||||
return False, frag_index
|
||||
headers = info_dict.get('http_headers', {}).copy()
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
@@ -501,8 +526,6 @@ class FragmentFD(FileDownloader):
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
if not interrupt_trigger[0]:
|
||||
break
|
||||
ctx['fragment_filename_sanitized'] = frag_filename
|
||||
ctx['fragment_index'] = frag_index
|
||||
result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
|
||||
|
||||
@@ -5,7 +5,6 @@ import os
|
||||
import socket
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@@ -16,6 +15,7 @@ from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
ThrottledDownload,
|
||||
write_xattr,
|
||||
@@ -59,6 +59,9 @@ class HttpFD(FileDownloader):
|
||||
ctx.chunk_size = None
|
||||
throttle_start = None
|
||||
|
||||
# parse given Range
|
||||
req_start, req_end, _ = parse_http_range(headers.get('Range'))
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
@@ -91,6 +94,9 @@ class HttpFD(FileDownloader):
|
||||
if not is_test and chunk_size else chunk_size)
|
||||
if ctx.resume_len > 0:
|
||||
range_start = ctx.resume_len
|
||||
if req_start is not None:
|
||||
# offset the beginning of Range to be within request
|
||||
range_start += req_start
|
||||
if ctx.is_resume:
|
||||
self.report_resuming_byte(ctx.resume_len)
|
||||
ctx.open_mode = 'ab'
|
||||
@@ -99,7 +105,17 @@ class HttpFD(FileDownloader):
|
||||
else:
|
||||
range_start = None
|
||||
ctx.is_resume = False
|
||||
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
|
||||
|
||||
if ctx.chunk_size:
|
||||
chunk_aware_end = range_start + ctx.chunk_size - 1
|
||||
# we're not allowed to download outside Range
|
||||
range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
|
||||
elif req_end is not None:
|
||||
# there's no need for chunked downloads, so download until the end of Range
|
||||
range_end = req_end
|
||||
else:
|
||||
range_end = None
|
||||
|
||||
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
|
||||
range_end = ctx.data_len - 1
|
||||
has_range = range_start is not None
|
||||
@@ -124,23 +140,19 @@ class HttpFD(FileDownloader):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if has_range:
|
||||
content_range = ctx.data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
|
||||
content_range_start, content_range_end, content_len = parse_http_range(content_range)
|
||||
if content_range_start is not None and range_start == content_range_start:
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m:
|
||||
if range_start == int(content_range_m.group(1)):
|
||||
content_range_end = int_or_none(content_range_m.group(2))
|
||||
content_len = int_or_none(content_range_m.group(3))
|
||||
accept_content_len = (
|
||||
# Non-chunked download
|
||||
not ctx.chunk_size
|
||||
# Chunked download and requested piece or
|
||||
# its part is promised to be served
|
||||
or content_range_end == range_end
|
||||
or content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
return
|
||||
accept_content_len = (
|
||||
# Non-chunked download
|
||||
not ctx.chunk_size
|
||||
# Chunked download and requested piece or
|
||||
# its part is promised to be served
|
||||
or content_range_end == range_end
|
||||
or content_len < range_end)
|
||||
if accept_content_len:
|
||||
ctx.data_len = content_len
|
||||
return
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
|
||||
@@ -5,9 +5,12 @@ import threading
|
||||
|
||||
try:
|
||||
import websockets
|
||||
has_websockets = True
|
||||
except ImportError:
|
||||
except (ImportError, SyntaxError):
|
||||
# websockets 3.10 on python 3.6 causes SyntaxError
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/2633
|
||||
has_websockets = False
|
||||
else:
|
||||
has_websockets = True
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
|
||||
@@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
if not self.params.get('skip_download'):
|
||||
self.report_warning('Live chat download runs until the livestream ends. '
|
||||
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
@@ -213,7 +213,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
for sd in ('720', 'sd', 'sd-low'):
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
@@ -300,11 +300,10 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
if self.get_param('noplaylist') and 'highlightVideo' in video_data:
|
||||
self.to_screen('Downloading just the highlight video because of --no-playlist')
|
||||
return self.url_result(video_data['highlightVideo']['shareUrl'], ie=ABCIViewIE.ie_key())
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):
|
||||
return self.url_result(highlight, ie=ABCIViewIE.ie_key())
|
||||
|
||||
self.to_screen(f'Downloading playlist {show_id} - add --no-playlist to just download the highlight video')
|
||||
series = video_data['selectedSeries']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
||||
484
yt_dlp/extractor/abematv.py
Normal file
484
yt_dlp/extractor/abematv.py
Normal file
@@ -0,0 +1,484 @@
|
||||
import io
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import struct
|
||||
from base64 import urlsafe_b64encode
|
||||
from binascii import unhexlify
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..compat import (
|
||||
compat_urllib_response,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
decode_base,
|
||||
int_or_none,
|
||||
random_uuidv4,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
update_url_query,
|
||||
traverse_obj,
|
||||
intlist_to_bytes,
|
||||
bytes_to_intlist,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
|
||||
|
||||
class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protcol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
|
||||
self.ie = ie
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie._downloader.params.get('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
|
||||
license_response = self.ie._download_json(
|
||||
'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base(license_response['k'], self.STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
ticket = compat_urllib_parse_urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
def _extract_breadcrumb_list(self, webpage, video_id):
|
||||
for jld in re.finditer(
|
||||
r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
webpage):
|
||||
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
|
||||
if jsonld:
|
||||
if jsonld.get('@type') != 'BreadcrumbList':
|
||||
continue
|
||||
trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
|
||||
if trav:
|
||||
return trav
|
||||
return []
|
||||
|
||||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
'id': '194-25_s2_p1',
|
||||
'title': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'series': '異世界食堂2',
|
||||
'series_number': 2,
|
||||
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
|
||||
'info_dict': {
|
||||
'id': 'E8tvAnMJ7a9a5d',
|
||||
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series': 'ゆるキャン△ SEASON2',
|
||||
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
|
||||
'series_number': 2,
|
||||
'episode_number': 1,
|
||||
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
|
||||
'info_dict': {
|
||||
'id': 'E8tvAnMJ7a9a5d',
|
||||
'title': '第5話『光射す』',
|
||||
'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
|
||||
'thumbnail': r're:https://hayabusa\.io/.+',
|
||||
'series': '相棒',
|
||||
'episode': '第5話『光射す』',
|
||||
},
|
||||
'skip': 'expired',
|
||||
}, {
|
||||
'url': 'https://abema.tv/now-on-air/abema-anime',
|
||||
'info_dict': {
|
||||
'id': 'abema-anime',
|
||||
# this varies
|
||||
# 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
|
||||
'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
|
||||
}]
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_TIMETABLE = None
|
||||
_MEDIATOKEN = None
|
||||
|
||||
_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
|
||||
|
||||
def _generate_aks(self, deviceid):
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
|
||||
tmp = None
|
||||
|
||||
def mix_once(nonce):
|
||||
nonlocal tmp
|
||||
h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256)
|
||||
h.update(nonce)
|
||||
tmp = h.digest()
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for i in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
nonlocal tmp
|
||||
mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
|
||||
|
||||
mix_once(self._SECRETKEY)
|
||||
mix_tmp(time_struct.tm_mon)
|
||||
mix_twist(deviceid)
|
||||
mix_tmp(time_struct.tm_mday % 5)
|
||||
mix_twist(ts_1hour_str)
|
||||
mix_tmp(time_struct.tm_hour % 5)
|
||||
|
||||
return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
|
||||
|
||||
def _get_device_token(self):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
self._DEVICE_ID = random_uuidv4()
|
||||
aks = self._generate_aks(self._DEVICE_ID)
|
||||
user_data = self._download_json(
|
||||
'https://api.abema.io/v1/users', None, note='Authorizing',
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
self._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
if not invalidate and self._MEDIATOKEN:
|
||||
return self._MEDIATOKEN
|
||||
|
||||
self._MEDIATOKEN = self._download_json(
|
||||
'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
|
||||
query={
|
||||
'osName': 'android',
|
||||
'osVersion': '6.0.1',
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1'
|
||||
}, headers={
|
||||
'Authorization': 'bearer ' + self._get_device_token()
|
||||
})['token']
|
||||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if not username:
|
||||
return True
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': 'bearer ' + self._get_device_token(),
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
self._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifing URL.
|
||||
# (unless there's a way to hook before downloading by extractor)
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + self._get_device_token(),
|
||||
}
|
||||
video_type = video_type.split('/')[-1]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
canonical_url = self._search_regex(
|
||||
r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
|
||||
default=url)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
|
||||
if not title:
|
||||
jsonld = None
|
||||
for jld in re.finditer(
|
||||
r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
webpage):
|
||||
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
|
||||
if jsonld:
|
||||
break
|
||||
if jsonld:
|
||||
title = jsonld.get('caption')
|
||||
if not title and video_type == 'now-on-air':
|
||||
if not self._TIMETABLE:
|
||||
# cache the timetable because it goes to 5MiB in size (!!)
|
||||
self._TIMETABLE = self._download_json(
|
||||
'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
|
||||
headers=headers)
|
||||
now = time_seconds(hours=9)
|
||||
for slot in self._TIMETABLE.get('slots', []):
|
||||
if slot.get('channelId') != video_id:
|
||||
continue
|
||||
if slot['startAt'] <= now and now < slot['endAt']:
|
||||
title = slot['title']
|
||||
break
|
||||
|
||||
# read breadcrumb on top of page
|
||||
breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
|
||||
if breadcrumb:
|
||||
# breadcrumb list translates to: (example is 1st test for this IE)
|
||||
# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
|
||||
# hence this works
|
||||
info['series'] = breadcrumb[-2]
|
||||
info['episode'] = breadcrumb[-1]
|
||||
if not title:
|
||||
title = info['episode']
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
('description', 'og:description', 'twitter:description'), webpage)
|
||||
if og_desc:
|
||||
description = re.sub(r'''(?sx)
|
||||
^(.+?)(?:
|
||||
アニメの動画を無料で見るならABEMA!| # anime
|
||||
等、.+ # applies for most of categories
|
||||
)?
|
||||
''', r'\1', og_desc)
|
||||
|
||||
# canonical URL may contain series and episode number
|
||||
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
|
||||
if mobj:
|
||||
seri = int_or_none(mobj.group(1), default=float('inf'))
|
||||
epis = int_or_none(mobj.group(2), default=float('inf'))
|
||||
info['series_number'] = seri if seri < 100 else None
|
||||
# some anime like Detective Conan (though not available in AbemaTV)
|
||||
# has more than 1000 episodes (1026 as of 2021/11/15)
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
if video_id == 'news-global':
|
||||
channel_url = update_url_query(channel_url, {'division': '1'})
|
||||
onair_channels = self._download_json(channel_url, video_id)
|
||||
for ch in onair_channels['channels']:
|
||||
if video_id == ch['id']:
|
||||
m3u8_url = ch['playback']['hls']
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
|
||||
elif video_type == 'episode':
|
||||
api_response = self._download_json(
|
||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||
elif video_type == 'slots':
|
||||
api_response = self._download_json(
|
||||
f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
|
||||
note='Checking playability',
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
raise ExtractorError('Unreachable')
|
||||
|
||||
if is_live:
|
||||
self.report_warning("This is a livestream; yt-dlp doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
|
||||
self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', live=is_live)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AbemaTVTitleIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/title/90-1597',
|
||||
'info_dict': {
|
||||
'id': '90-1597',
|
||||
'title': 'シャッフルアイランド',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://abema.tv/video/title/193-132',
|
||||
'info_dict': {
|
||||
'id': '193-132',
|
||||
'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id)
|
||||
if breadcrumb:
|
||||
playlist_title = breadcrumb[-1]
|
||||
|
||||
playlist = [
|
||||
self.url_result(urljoin('https://abema.tv/', mobj.group(1)))
|
||||
for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)]
|
||||
|
||||
return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id)
|
||||
@@ -8,11 +8,10 @@ import os
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
@@ -84,14 +83,11 @@ class ADNIE(InfoExtractor):
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
None, fatal=False)
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + 'ab9f52f5baae7c72'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
|
||||
|
||||
@@ -1345,6 +1345,11 @@ MSO_INFO = {
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'Suddenlink': {
|
||||
'name': 'Suddenlink',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1635,6 +1640,52 @@ class AdobePassIE(InfoExtractor):
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Suddenlink':
|
||||
# Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
|
||||
# but they also do a dynmaic redirect using javascript that has to be followed as well
|
||||
first_bookend_page, urlh = post_form(
|
||||
provider_redirect_page_res, 'Pressing Continue...')
|
||||
|
||||
hidden_data = self._hidden_inputs(first_bookend_page)
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page = self._download_webpage(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_tryauth_url = self._html_search_regex(
|
||||
r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
|
||||
|
||||
provider_tryauth_page = self._download_webpage(
|
||||
provider_tryauth_url, video_id, 'Submitting TryAuth',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
|
||||
video_id, 'Getting Login Page',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
'Downloading Auth Association Redirect Page')
|
||||
|
||||
hidden_data = self._hidden_inputs(last_bookend_page)
|
||||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
|
||||
@@ -416,26 +416,35 @@ class AfreecaTVLiveIE(AfreecaTVIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||
password = self.get_param('videopassword')
|
||||
|
||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
||||
channel_info = info.get('CHANNEL') or {}
|
||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||
password_protected = channel_info.get('BPWD')
|
||||
if not broadcast_no:
|
||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
||||
if password_protected == 'Y' and password is None:
|
||||
raise ExtractorError(
|
||||
'This livestream is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
quality_key = qualities(self._QUALITIES)
|
||||
for quality_str in self._QUALITIES:
|
||||
params = {
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}
|
||||
if password is not None:
|
||||
params['pwd'] = password
|
||||
aid_response = self._download_json(
|
||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'bno': broadcast_no,
|
||||
'stream_type': 'common',
|
||||
'type': 'aid',
|
||||
'quality': quality_str,
|
||||
}),
|
||||
data=urlencode_postdata(params),
|
||||
note=f'Downloading access token for {quality_str} stream',
|
||||
errnote=f'Unable to download access token for {quality_str} stream')
|
||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
||||
|
||||
@@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
|
||||
87
yt_dlp/extractor/alsace20tv.py
Normal file
87
yt_dlp/extractor/alsace20tv.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
formats = []
|
||||
for res, fmt_url in (info.get('files') or {}).items():
|
||||
formats.extend(
|
||||
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||
if '/smil:_' in fmt_url
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||
'view_count': int_or_none(info.get('nb_vues')),
|
||||
}
|
||||
|
||||
|
||||
class Alsace20TVIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 1073,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id, url)
|
||||
|
||||
|
||||
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id)
|
||||
143
yt_dlp/extractor/ant1newsgr.py
Normal file
143
yt_dlp/extractor/ant1newsgr.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
self._sort_formats(formats)
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
'info_dict': {
|
||||
'id': '620286',
|
||||
'title': 'md5:91fe569e952e4d146485740ae927662b',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
||||
'md5': 'dfc58c3a11a5a9aad2ba316ed447def3',
|
||||
'info_dict': {
|
||||
'id': '3f_li_c_az_jw_y_u=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a30c93332455f53e1e84ae0724f0adf7',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
_EMBED_URL_RE = rf'{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
_EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_EMBED_URL_RE})(?P=_q1)'
|
||||
for mobj in re.finditer(_EMBED_RE, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
yield url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
return self._download_and_extract_api_data(video_id, netloc, cid=cid)
|
||||
@@ -3,7 +3,9 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
@@ -14,16 +16,17 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
@@ -39,24 +42,47 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'title': episode.get('name'),
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
KNOWN_EXTENSIONS,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -64,7 +65,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'uploader': 'yorkmba99@hotmail.com',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': "20131222",
|
||||
'upload_date': '20131222',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
@@ -150,8 +151,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
embed_page = self._download_webpage(
|
||||
'https://archive.org/embed/' + identifier, identifier)
|
||||
embed_page = self._download_webpage(f'https://archive.org/embed/{identifier}', identifier)
|
||||
playlist = self._playlist_data(embed_page)
|
||||
|
||||
entries = {}
|
||||
@@ -166,17 +166,17 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnails': [],
|
||||
'artist': p.get('artist'),
|
||||
'track': p.get('title'),
|
||||
'subtitles': {}}
|
||||
'subtitles': {},
|
||||
}
|
||||
|
||||
for track in p.get('tracks', []):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')}
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
}
|
||||
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/metadata/' + identifier, identifier)
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
m = metadata['metadata']
|
||||
identifier = m['identifier']
|
||||
|
||||
@@ -189,7 +189,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'license': m.get('licenseurl'),
|
||||
'release_date': unified_strdate(m.get('date')),
|
||||
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
|
||||
'webpage_url': 'https://archive.org/details/' + identifier,
|
||||
'webpage_url': f'https://archive.org/details/{identifier}',
|
||||
'location': m.get('venue'),
|
||||
'release_year': int_or_none(m.get('year'))}
|
||||
|
||||
@@ -207,7 +207,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'discnumber': int_or_none(f.get('disc')),
|
||||
'release_year': int_or_none(f.get('year'))})
|
||||
entry = entries[f['name']]
|
||||
elif f.get('original') in entries:
|
||||
elif traverse_obj(f, 'original', expected_type=str) in entries:
|
||||
entry = entries[f['original']]
|
||||
else:
|
||||
continue
|
||||
@@ -230,13 +230,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https'})
|
||||
|
||||
# Sort available formats by filesize
|
||||
for entry in entries.values():
|
||||
entry['formats'] = list(sorted(entry['formats'], key=lambda x: x.get('filesize', -1)))
|
||||
self._sort_formats(entry['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
# If there's only one item, use it as the main info dict
|
||||
only_video = entries[list(entries.keys())[0]]
|
||||
only_video = next(iter(entries.values()))
|
||||
if entry_id:
|
||||
info = merge_dicts(only_video, info)
|
||||
else:
|
||||
@@ -261,19 +260,19 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
class YoutubeWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:youtube'
|
||||
IE_DESC = 'web.archive.org saved youtube videos'
|
||||
_VALID_URL = r"""(?x)^
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?
|
||||
(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
|
||||
(?:https?(?::|%3[Aa])//)?
|
||||
(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
(?P<id>[0-9A-Za-z_-]{11})(?:%26|\#|&|$)
|
||||
"""
|
||||
IE_DESC = 'web.archive.org saved youtube videos, "ytarchive:" prefix'
|
||||
_VALID_URL = r'''(?x)(?:(?P<prefix>ytarchive:)|
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD]) # Youtube URL
|
||||
|(?:wayback-fakeurl\.archive\.org/yt/) # Or the internal fake url
|
||||
)
|
||||
)(?P<id>[0-9A-Za-z_-]{11})
|
||||
(?(prefix)
|
||||
(?::(?P<date2>[0-9]{14}))?$|
|
||||
(?:%26|[#&]|$)
|
||||
)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -438,7 +437,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
}
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
|
||||
@@ -484,7 +489,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
page_title, 'title', default='')
|
||||
|
||||
def _extract_metadata(self, video_id, webpage):
|
||||
|
||||
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
|
||||
@@ -596,7 +600,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Prefer the new polymer UI captures as we support extracting more metadata from them
|
||||
# WBM captures seem to all switch to this layout ~July 2020
|
||||
modern_captures = list(filter(lambda x: x >= 20200701000000, all_captures))
|
||||
modern_captures = [x for x in all_captures if x >= 20200701000000]
|
||||
if modern_captures:
|
||||
capture_dates.append(modern_captures[0])
|
||||
capture_dates.append(url_date)
|
||||
@@ -608,11 +612,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
# Fallbacks if any of the above fail
|
||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||
return orderedSet(capture_dates)
|
||||
return orderedSet(filter(None, capture_dates))
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
url_date, video_id = self._match_valid_url(url).groups()
|
||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
urlh = None
|
||||
try:
|
||||
@@ -629,11 +633,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
raise
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + ', '.join(str(i) for i in capture_dates if i is not None))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
info = {'id': video_id}
|
||||
for capture in capture_dates:
|
||||
if not capture:
|
||||
continue
|
||||
webpage = self._download_webpage(
|
||||
(self._WAYBACK_BASE_URL + 'http://www.youtube.com/watch?v=%s') % (capture, video_id),
|
||||
video_id=video_id, fatal=False, errnote='unable to download capture webpage (it may not be archived)',
|
||||
@@ -648,7 +650,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
|
||||
@@ -124,8 +124,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
formats.extend(smil_formats)
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
|
||||
@@ -407,8 +407,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:(?P<client>[^/]+)/)?
|
||||
(?:player|live|video|(?P<playlist>sendung|sammlung))/
|
||||
(?:(?P<display_id>[^?#]+)/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)'''
|
||||
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
|
||||
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
|
||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
@@ -436,6 +437,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
|
||||
'playlist_count': 6,
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
|
||||
'title': 'beforeigners/beforeigners/staffel-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -561,14 +569,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
break
|
||||
pageNumber = pageNumber + 1
|
||||
|
||||
return self.playlist_result(entries, playlist_title=display_id)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, playlist_type, client = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client')
|
||||
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
|
||||
'id', 'display_id', 'playlist', 'client', 'season')
|
||||
display_id, client = display_id or video_id, client or 'ard'
|
||||
|
||||
if playlist_type:
|
||||
# TODO: Extract only specified season
|
||||
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
|
||||
|
||||
player_page = self._download_json(
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
qualities,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
@@ -253,3 +254,44 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
'id': 'politics-and-society',
|
||||
'title': 'Politics and society',
|
||||
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
and super(ArteTVCategoryIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
items.append(video)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
||||
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
||||
|
||||
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
|
||||
description=self._og_search_description(webpage, default=None))
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -94,6 +95,11 @@ class ATVAtIE(InfoExtractor):
|
||||
})
|
||||
|
||||
video_id, videos_data = list(videos['data'].items())[0]
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, contentResource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
||||
@@ -29,6 +29,7 @@ class AudiomackIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
|
||||
@@ -183,6 +183,7 @@ class BandcampIE(InfoExtractor):
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
'acodec': format_id.split('-')[0],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -212,7 +213,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -257,14 +258,6 @@ class BandcampAlbumIE(BandcampIE):
|
||||
'id': 'hierophany-of-the-open-grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
'id': 'dotscale',
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
@@ -391,41 +384,63 @@ class BandcampWeeklyIE(BandcampIE):
|
||||
}
|
||||
|
||||
|
||||
class BandcampMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
|
||||
class BandcampUserIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:user'
|
||||
_VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
# Type 1 Bandcamp user page.
|
||||
'url': 'https://adrianvonziegler.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'adrianvonziegler',
|
||||
'title': 'Discography of adrianvonziegler',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
}, {
|
||||
# Bandcamp user page with only one album
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'dotscale',
|
||||
'title': 'Discography of dotscale'
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# Type 2 Bandcamp user page.
|
||||
'url': 'https://nightcallofficial.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'nightcallofficial',
|
||||
'title': 'Discography of nightcallofficial',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://steviasphere.bandcamp.com/music',
|
||||
'playlist_mincount': 47,
|
||||
'info_dict': {
|
||||
'id': 'steviasphere',
|
||||
'title': 'Discography of steviasphere',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'coldworldofficial',
|
||||
'title': 'Discography of coldworldofficial',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
|
||||
'playlist_mincount': 399,
|
||||
'info_dict': {
|
||||
'id': 'nuclearwarnowproductions',
|
||||
'title': 'Discography of nuclearwarnowproductions',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_TYPE_IE_DICT = {
|
||||
'album': BandcampAlbumIE.ie_key(),
|
||||
'track': BandcampIE.ie_key()
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://{id}.bandcamp.com/{item[0]}',
|
||||
ie=self._TYPE_IE_DICT[item[1]])
|
||||
for item in items]
|
||||
return self.playlist_result(entries, id)
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader)
|
||||
|
||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -38,7 +39,7 @@ from ..utils import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -394,9 +395,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||
try:
|
||||
fmts = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
@@ -784,21 +793,33 @@ class BBCIE(BBCCoUkIE):
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# bbcthreeConfig
|
||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||
'info_dict': {
|
||||
'id': 'p06556y7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||
'title': 'Things Not To Say to people that live on council estates',
|
||||
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||
'duration': 360,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
@@ -1171,9 +1192,16 @@ class BBCIE(BBCCoUkIE):
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
if initial_data is None:
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||
'preload state', default={})
|
||||
else:
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
@@ -1214,7 +1242,10 @@ class BBCIE(BBCCoUkIE):
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') != 'media':
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
|
||||
@@ -1,32 +1,45 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'url': 'https://beeg.com/-0983946056129650',
|
||||
'md5': '51d235147c4627cfce884f844293ff88',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'id': '0983946056129650',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
|
||||
'timestamp': 1391813355,
|
||||
'upload_date': '20140207',
|
||||
'duration': 383,
|
||||
'title': 'sucked cock and fucked in a private plane',
|
||||
'duration': 927,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': 2540839,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
|
||||
'info_dict': {
|
||||
'id': '0599050563103750',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bad Relatives',
|
||||
'duration': 2060,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
|
||||
'timestamp': 1643623200,
|
||||
'display_id': 2569965,
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
@@ -36,12 +49,6 @@ class BeegIE(InfoExtractor):
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/5416503',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -49,68 +56,38 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
video = self._download_json(
|
||||
'https://store.externulls.com/facts/file/%s' % video_id,
|
||||
video_id, 'Downloading JSON for %s' % video_id)
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = parse_qs(url)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
fc_facts = video.get('fc_facts')
|
||||
first_fact = {}
|
||||
for fact in fc_facts:
|
||||
if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
|
||||
first_fact = fact
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
for format_id, video_uri in resources.items():
|
||||
if not video_uri:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
|
||||
current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
|
||||
for f in current_formats:
|
||||
f['height'] = height
|
||||
formats.extend(current_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'display_id': first_fact.get('id'),
|
||||
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
|
||||
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
|
||||
'timestamp': unified_timestamp(first_fact.get('fc_created')),
|
||||
'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
|
||||
'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
||||
59
yt_dlp/extractor/bigo.py
Normal file
59
yt_dlp/extractor/bigo.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class BigoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bigo.tv/ja/221338632',
|
||||
'info_dict': {
|
||||
'id': '6576287577575737440',
|
||||
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '✨Shin💫',
|
||||
'uploader_id': '221338632',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bigo.tv/115976881',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
info_raw = self._download_json(
|
||||
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||
|
||||
if not isinstance(info_raw, dict):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
raise ExtractorError('This user is offline.', expected=True)
|
||||
|
||||
return {
|
||||
'id': info.get('roomId') or user_id,
|
||||
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||
'formats': [{
|
||||
'url': info.get('hls_src'),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
}],
|
||||
'thumbnail': info.get('snapshot'),
|
||||
'uploader': info.get('nick_name'),
|
||||
'uploader_id': user_id,
|
||||
'is_live': True,
|
||||
}
|
||||
@@ -17,9 +17,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
parse_count,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
@@ -52,16 +52,14 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'id': '1074402_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012678,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'uploader': '菊子桑',
|
||||
'upload_date': '20140420',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1398012678,
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
@@ -75,49 +73,27 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'id': '100643_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
# Title with double quotes
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802',
|
||||
'id': '8903802_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'upload_date': '20170301',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'timestamp': 1488382634,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '8903802_part2',
|
||||
'ext': 'flv',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'uploader': '阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'timestamp': 1488382634,
|
||||
'upload_date': '20170301',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
@@ -152,6 +128,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
|
||||
video_id = av_id
|
||||
|
||||
info = {}
|
||||
anime_id = mobj.group('anime_id')
|
||||
page_id = mobj.group('page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -203,66 +180,95 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
|
||||
video_id, fatal=False)
|
||||
video_info = video_info.get('data') or {}
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
|
||||
if not video_info:
|
||||
continue
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
if not video_info:
|
||||
continue
|
||||
|
||||
if 'durl' not in video_info:
|
||||
if not durl and 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats = []
|
||||
for idx, durl in enumerate(durl or video_info['durl']):
|
||||
formats.append({
|
||||
'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'),
|
||||
'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')),
|
||||
'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')),
|
||||
'width': int_or_none(durl.get('width')),
|
||||
'height': int_or_none(durl.get('height')),
|
||||
'vcodec': durl.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'tbr': float_or_none(durl.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(durl.get('size')),
|
||||
})
|
||||
for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
|
||||
'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')),
|
||||
'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')),
|
||||
'width': int_or_none(audio.get('width')),
|
||||
'height': int_or_none(audio.get('height')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
})
|
||||
for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
})
|
||||
break
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
|
||||
group='title')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex((
|
||||
r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
|
||||
r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
|
||||
self._meta_regex('title')
|
||||
), webpage, 'title', group='content', fatal=False)
|
||||
|
||||
# Get part title for anthologies
|
||||
if page_id is not None:
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
|
||||
part_title = try_get(
|
||||
self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'),
|
||||
lambda x: x['data'][int(page_id) - 1]['part'])
|
||||
title = part_title or title
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video.
|
||||
part_info = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'), 'data', expected_type=list)
|
||||
title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
@@ -272,15 +278,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id),
|
||||
info.update({
|
||||
'id': f'{video_id}_part{page_id or 1}',
|
||||
'cid': cid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
})
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
@@ -301,7 +307,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
|
||||
}
|
||||
|
||||
entries[0]['subtitles'] = {
|
||||
info['subtitles'] = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
@@ -336,12 +342,10 @@ class BiliBiliIE(InfoExtractor):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': str(video_id),
|
||||
'bv_id': bv_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
**info, **top_level_info
|
||||
}
|
||||
|
||||
@@ -482,9 +486,9 @@ class BilibiliChannelIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
|
||||
|
||||
max_count = max_count or try_get(data, lambda x: x['page']['count'])
|
||||
max_count = max_count or traverse_obj(data, ('page', 'count'))
|
||||
|
||||
entries = try_get(data, lambda x: x['list']['vlist'])
|
||||
entries = traverse_obj(data, ('list', 'vlist'))
|
||||
if not entries:
|
||||
return
|
||||
for entry in entries:
|
||||
@@ -522,7 +526,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
api_url, query, query={'Search_key': query, 'pn': page_num},
|
||||
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
||||
|
||||
video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
|
||||
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
|
||||
if not video_list:
|
||||
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
||||
|
||||
@@ -552,7 +556,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
|
||||
api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
|
||||
page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
|
||||
page_data = try_get(page_json, lambda x: x['data']['page'], dict)
|
||||
page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
|
||||
count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
|
||||
if count is None or not size:
|
||||
raise ExtractorError('Failed to calculate either page count or size')
|
||||
|
||||
@@ -3,27 +3,28 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vk import VKIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BIQLEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
|
||||
_TESTS = [{
|
||||
# Youtube embed
|
||||
'url': 'https://biqle.ru/watch/-115995369_456239081',
|
||||
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
|
||||
'url': 'https://biqle.ru/watch/-2000421746_85421746',
|
||||
'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
|
||||
'info_dict': {
|
||||
'id': '8v4f-avW-VI',
|
||||
'id': '-2000421746_85421746',
|
||||
'ext': 'mp4',
|
||||
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
|
||||
'description': 'Passe-Partout',
|
||||
'uploader_id': 'mrsimpsonstef3',
|
||||
'uploader': 'Phanolito',
|
||||
'upload_date': '20120822',
|
||||
'title': 'Forsaken By Hope Studio Clip',
|
||||
'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
|
||||
'upload_date': '19700101',
|
||||
'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
|
||||
'timestamp': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://biqle.org/watch/-44781847_168547604',
|
||||
@@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor):
|
||||
'id': '-44781847_168547604',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
|
||||
'timestamp': 1396633454,
|
||||
'uploader': 'Dmitry Kotov',
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '47850140',
|
||||
'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
|
||||
webpage, 'embed url'))
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'Title', fatal=False)
|
||||
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
|
||||
description = self._html_search_meta('description', webpage, 'Description', default=None)
|
||||
|
||||
global_embed_url = self._search_regex(
|
||||
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
|
||||
webpage, 'global Embed url')
|
||||
hash = self._search_regex(
|
||||
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
|
||||
|
||||
embed_url = global_embed_url + hash
|
||||
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, video_id, headers={'Referer': url})
|
||||
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||
if video_ext:
|
||||
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||
if not video_ext:
|
||||
video_ext = compat_b64decode(self._search_regex(
|
||||
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||
embed_page, 'video_ext')).decode()
|
||||
video_id, sig, _, access_token = video_ext.split(':')
|
||||
embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
|
||||
|
||||
glob_params = self._parse_json(self._search_regex(
|
||||
r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
|
||||
embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
|
||||
host_name = compat_b64decode(glob_params['server'][::-1]).decode()
|
||||
|
||||
item = self._download_json(
|
||||
'https://api.vk.com/method/video.get', video_id,
|
||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||
'access_token': access_token,
|
||||
'sig': sig,
|
||||
'v': 5.44,
|
||||
f'https://{host_name}/method/video.get/{video_id}', video_id,
|
||||
headers={'Referer': url}, query={
|
||||
'token': glob_params['video']['access_token'],
|
||||
'videos': video_id,
|
||||
'ckey': glob_params['c_key'],
|
||||
'credentials': glob_params['video']['credentials'],
|
||||
})['response']['items'][0]
|
||||
title = item['title']
|
||||
|
||||
formats = []
|
||||
for f_id, f_url in item.get('files', {}).items():
|
||||
if f_id == 'external':
|
||||
return self.url_result(f_url)
|
||||
ext, height = f_id.split('_')
|
||||
formats.append({
|
||||
'format_id': height + 'p',
|
||||
'url': f_url,
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
|
||||
if height_extra_key:
|
||||
formats.append({
|
||||
'format_id': f'{height}p',
|
||||
'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
|
||||
'height': int_or_none(height),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
@@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'comment_count': int_or_none(item.get('comments')),
|
||||
'description': item.get('description'),
|
||||
'description': description,
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('date')),
|
||||
'uploader': item.get('owner_id'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
}
|
||||
|
||||
41
yt_dlp/extractor/caltrans.py
Normal file
41
yt_dlp/extractor/caltrans.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CaltransIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?ca\.gov/vm/loc/[^/]+/(?P<id>[a-z0-9_]+)\.htm'
|
||||
_TEST = {
|
||||
'url': 'https://cwwp2.dot.ca.gov/vm/loc/d3/hwy50at24th.htm',
|
||||
'info_dict': {
|
||||
'id': 'hwy50at24th',
|
||||
'ext': 'ts',
|
||||
'title': 'US-50 : Sacramento : Hwy 50 at 24th',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
global_vars = self._search_regex(
|
||||
r'<script[^<]+?([^<]+\.m3u8[^<]+)</script>',
|
||||
webpage, 'Global Vars')
|
||||
route_place = self._search_regex(r'routePlace\s*=\s*"([^"]+)"', global_vars, 'Route Place', fatal=False)
|
||||
location_name = self._search_regex(r'locationName\s*=\s*"([^"]+)"', global_vars, 'Location Name', fatal=False)
|
||||
poster_url = self._search_regex(r'posterURL\s*=\s*"([^"]+)"', global_vars, 'Poster Url', fatal=False)
|
||||
video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False)
|
||||
|
||||
formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'{route_place} : {location_name}',
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'thumbnail': poster_url,
|
||||
}
|
||||
@@ -13,6 +13,8 @@ class CAM4IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'age_limit': 18,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,4 +31,5 @@ class CAM4IE(InfoExtractor):
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'thumbnail': f'https://snapshots.xcdnpro.com/thumbnails/{channel_id}',
|
||||
}
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_timezone,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -95,14 +92,8 @@ class CCMAIE(InfoExtractor):
|
||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||
tematica = try_get(informacio, lambda x: x['tematica']['text'])
|
||||
|
||||
timestamp = None
|
||||
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||
try:
|
||||
timezone, data_utc = extract_timezone(data_utc)
|
||||
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||
except TypeError:
|
||||
pass
|
||||
timestamp = unified_timestamp(data_utc)
|
||||
|
||||
subtitles = {}
|
||||
subtitols = media.get('subtitols') or []
|
||||
|
||||
@@ -162,7 +162,8 @@ class CCTVIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'quality': quality,
|
||||
'source_preference': -10
|
||||
# Sample clip
|
||||
'preference': -10
|
||||
})
|
||||
|
||||
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
|
||||
|
||||
@@ -75,6 +75,7 @@ from ..utils import (
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
UnsupportedError,
|
||||
unified_strdate,
|
||||
@@ -225,6 +226,7 @@ class InfoExtractor(object):
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
direct: True if a direct video file was given (must only be set by GenericIE)
|
||||
alt_title: A secondary title of the video.
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
@@ -239,6 +241,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
* "http_headers" (dict) - HTTP headers for the request
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -272,6 +275,8 @@ class InfoExtractor(object):
|
||||
* "url": A URL pointing to the subtitles file
|
||||
It can optionally also have:
|
||||
* "name": Name or description of the subtitles
|
||||
* "http_headers": A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
"ext" will be calculated from URL if missing
|
||||
automatic_captions: Like 'subtitles'; contains automatically generated
|
||||
captions instead of normal subtitles
|
||||
@@ -421,8 +426,8 @@ class InfoExtractor(object):
|
||||
title, description etc.
|
||||
|
||||
|
||||
Subclasses of this one should re-define the _real_initialize() and
|
||||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Subclasses of this should define a _VALID_URL regexp and, re-define the
|
||||
_real_extract() and (optionally) _real_initialize() methods.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
Subclasses may also override suitable() if necessary, but ensure the function
|
||||
@@ -635,7 +640,7 @@ class InfoExtractor(object):
|
||||
}
|
||||
if hasattr(e, 'countries'):
|
||||
kwargs['countries'] = e.countries
|
||||
raise type(e)(e.msg, **kwargs)
|
||||
raise type(e)(e.orig_msg, **kwargs)
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
@@ -657,7 +662,7 @@ class InfoExtractor(object):
|
||||
return False
|
||||
|
||||
def set_downloader(self, downloader):
|
||||
"""Sets the downloader for this IE."""
|
||||
"""Sets a YoutubeDL instance as the downloader for this IE."""
|
||||
self._downloader = downloader
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -666,7 +671,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""Real extraction process. Redefine in subclasses."""
|
||||
pass
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
@classmethod
|
||||
def ie_key(cls):
|
||||
@@ -745,7 +750,7 @@ class InfoExtractor(object):
|
||||
|
||||
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||
raise ExtractorError(errmsg, cause=err)
|
||||
else:
|
||||
self.report_warning(errmsg)
|
||||
return False
|
||||
@@ -1097,6 +1102,7 @@ class InfoExtractor(object):
|
||||
if metadata_available and (
|
||||
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
|
||||
self.report_warning(msg)
|
||||
return
|
||||
if method is not None:
|
||||
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
|
||||
raise ExtractorError(msg, expected=True)
|
||||
@@ -1135,8 +1141,8 @@ class InfoExtractor(object):
|
||||
'url': url,
|
||||
}
|
||||
|
||||
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, **kwargs):
|
||||
urls = (self.url_result(self._proto_relative_url(m), ie)
|
||||
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, video_kwargs=None, **kwargs):
|
||||
urls = (self.url_result(self._proto_relative_url(m), ie, **(video_kwargs or {}))
|
||||
for m in orderedSet(map(getter, matches) if getter else matches))
|
||||
return self.playlist_result(urls, playlist_id, playlist_title, **kwargs)
|
||||
|
||||
@@ -1291,6 +1297,7 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
kargs.setdefault('fatal', False)
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
@@ -1302,6 +1309,10 @@ class InfoExtractor(object):
|
||||
def _og_search_url(self, html, **kargs):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_extract_title(self, html, name, **kwargs):
|
||||
return self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', html, name, **kwargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
name = variadic(name)
|
||||
if display_name is None:
|
||||
@@ -1447,7 +1458,7 @@ class InfoExtractor(object):
|
||||
'title': part.get('name'),
|
||||
'start_time': part.get('startOffset'),
|
||||
'end_time': part.get('endOffset'),
|
||||
} for part in e.get('hasPart', []) if part.get('@type') == 'Clip']
|
||||
} for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip']
|
||||
for idx, (last_c, current_c, next_c) in enumerate(zip(
|
||||
[{'end_time': 0}] + chapters, chapters, chapters[1:])):
|
||||
current_c['end_time'] = current_c['end_time'] or next_c['start_time']
|
||||
@@ -1528,6 +1539,8 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
|
||||
})
|
||||
if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
|
||||
extract_video_object(e['video'][0])
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
if expected_type is None:
|
||||
@@ -1606,7 +1619,7 @@ class InfoExtractor(object):
|
||||
'vcodec': {'type': 'ordered', 'regex': True,
|
||||
'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
|
||||
'acodec': {'type': 'ordered', 'regex': True,
|
||||
'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
|
||||
'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
|
||||
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
|
||||
'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
|
||||
'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
|
||||
@@ -1649,31 +1662,31 @@ class InfoExtractor(object):
|
||||
'format_id': {'type': 'alias', 'field': 'id'},
|
||||
'preference': {'type': 'alias', 'field': 'ie_pref'},
|
||||
'language_preference': {'type': 'alias', 'field': 'lang'},
|
||||
'source_preference': {'type': 'alias', 'field': 'source'},
|
||||
'protocol': {'type': 'alias', 'field': 'proto'},
|
||||
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
|
||||
|
||||
# Deprecated
|
||||
'dimension': {'type': 'alias', 'field': 'res'},
|
||||
'resolution': {'type': 'alias', 'field': 'res'},
|
||||
'extension': {'type': 'alias', 'field': 'ext'},
|
||||
'bitrate': {'type': 'alias', 'field': 'br'},
|
||||
'total_bitrate': {'type': 'alias', 'field': 'tbr'},
|
||||
'video_bitrate': {'type': 'alias', 'field': 'vbr'},
|
||||
'audio_bitrate': {'type': 'alias', 'field': 'abr'},
|
||||
'framerate': {'type': 'alias', 'field': 'fps'},
|
||||
'protocol': {'type': 'alias', 'field': 'proto'},
|
||||
'source_preference': {'type': 'alias', 'field': 'source'},
|
||||
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
|
||||
'filesize_estimate': {'type': 'alias', 'field': 'size'},
|
||||
'samplerate': {'type': 'alias', 'field': 'asr'},
|
||||
'video_ext': {'type': 'alias', 'field': 'vext'},
|
||||
'audio_ext': {'type': 'alias', 'field': 'aext'},
|
||||
'video_codec': {'type': 'alias', 'field': 'vcodec'},
|
||||
'audio_codec': {'type': 'alias', 'field': 'acodec'},
|
||||
'video': {'type': 'alias', 'field': 'hasvid'},
|
||||
'has_video': {'type': 'alias', 'field': 'hasvid'},
|
||||
'audio': {'type': 'alias', 'field': 'hasaud'},
|
||||
'has_audio': {'type': 'alias', 'field': 'hasaud'},
|
||||
'extractor': {'type': 'alias', 'field': 'ie_pref'},
|
||||
'extractor_preference': {'type': 'alias', 'field': 'ie_pref'},
|
||||
'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
|
||||
'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
|
||||
'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
|
||||
'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
|
||||
'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
|
||||
'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
|
||||
'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
|
||||
'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
|
||||
'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
|
||||
'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
|
||||
'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
|
||||
'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
|
||||
'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
|
||||
'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
|
||||
'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
|
||||
'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
|
||||
'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
|
||||
'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
|
||||
'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
|
||||
'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
|
||||
}
|
||||
|
||||
def __init__(self, ie, field_preference):
|
||||
@@ -1773,7 +1786,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
if self._get_field_setting(field, 'type') == 'alias':
|
||||
alias, field = field, self._get_field_setting(field, 'field')
|
||||
if alias not in ('format_id', 'preference', 'language_preference'):
|
||||
if self._get_field_setting(alias, 'deprecated'):
|
||||
self.ydl.deprecation_warning(
|
||||
f'Format sorting alias {alias} is deprecated '
|
||||
f'and may be removed in a future version. Please use {field} instead')
|
||||
@@ -2872,7 +2885,8 @@ class InfoExtractor(object):
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['total_number'] = int(math.ceil(
|
||||
float_or_none(period_duration, segment_duration, default=0)))
|
||||
representation_ms_info['fragments'] = [{
|
||||
media_location_key: media_template % {
|
||||
'Number': segment_number,
|
||||
@@ -2963,6 +2977,10 @@ class InfoExtractor(object):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
if not period_duration:
|
||||
period_duration = try_get(
|
||||
representation_ms_info,
|
||||
lambda r: sum(frag['duration'] for frag in r['fragments']), float)
|
||||
else:
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
@@ -3105,7 +3123,7 @@ class InfoExtractor(object):
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
|
||||
def absolute_url(item_url):
|
||||
return urljoin(base_url, item_url)
|
||||
|
||||
@@ -3662,7 +3680,7 @@ class InfoExtractor(object):
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if (self._get_login_info()[0] is not None
|
||||
if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None
|
||||
or self.get_param('cookiefile')
|
||||
or self.get_param('cookiesfrombrowser')):
|
||||
self._mark_watched(*args, **kwargs)
|
||||
@@ -3710,6 +3728,22 @@ class InfoExtractor(object):
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
def _yes_playlist(self, playlist_id, video_id, smuggled_data=None, *, playlist_label='playlist', video_label='video'):
|
||||
if not playlist_id or not video_id:
|
||||
return not video_id
|
||||
|
||||
no_playlist = (smuggled_data or {}).get('force_noplaylist')
|
||||
if no_playlist is not None:
|
||||
return not no_playlist
|
||||
|
||||
video_id = '' if video_id is True else f' {video_id}'
|
||||
playlist_id = '' if playlist_id is True else f' {playlist_id}'
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen(f'Downloading just the {video_label}{video_id} because of --no-playlist')
|
||||
return False
|
||||
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
|
||||
return True
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
||||
148
yt_dlp/extractor/cpac.py
Normal file
148
yt_dlp/extractor/cpac.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
# compat_range
|
||||
try:
|
||||
if callable(xrange):
|
||||
range = xrange
|
||||
except (NameError, TypeError):
|
||||
pass
|
||||
|
||||
|
||||
class CPACIE(InfoExtractor):
|
||||
IE_NAME = 'cpac'
|
||||
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||
'info_dict': {
|
||||
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220215',
|
||||
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||
'timestamp': 1644901200,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||
|
||||
content = self._download_json(
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||
video_id)
|
||||
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||
formats = []
|
||||
if video_url:
|
||||
content = content['page']
|
||||
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||
for fmt in formats:
|
||||
# prefer language to match URL
|
||||
fmt_lang = fmt.get('language')
|
||||
if fmt_lang == url_lang:
|
||||
fmt['language_preference'] = 10
|
||||
elif not fmt_lang:
|
||||
fmt['language_preference'] = -1
|
||||
else:
|
||||
fmt['language_preference'] = -10
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||
|
||||
def is_live(v_type):
|
||||
return (v_type == 'live') if v_type is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||
'category': [category] if category else None,
|
||||
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||
'is_live': is_live(content['details'].get('type')),
|
||||
}
|
||||
|
||||
|
||||
class CPACPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cpac:playlist'
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cpac.ca/program?id=6',
|
||||
'info_dict': {
|
||||
'id': 'id=6',
|
||||
'title': 'Headline Politics',
|
||||
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||
'info_dict': {
|
||||
'id': 'key=hudson',
|
||||
'title': 'hudson',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?programId=50',
|
||||
'info_dict': {
|
||||
'id': 'programId=50',
|
||||
'title': '50',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/emission?id=6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||
api_url = (
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||
% (pl_type, video_id, ))
|
||||
content = self._download_json(api_url, video_id)
|
||||
entries = []
|
||||
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||
for page in range(1, total_pages + 1):
|
||||
if page > 1:
|
||||
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||
content = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading continuation - %d' % (page, ),
|
||||
fatal=False)
|
||||
|
||||
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||
if episode_url:
|
||||
entries.append(episode_url)
|
||||
|
||||
return self.playlist_result(
|
||||
(self.url_result(entry) for entry in entries),
|
||||
playlist_id=video_id,
|
||||
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||
)
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
import json
|
||||
import zlib
|
||||
@@ -23,15 +24,17 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
lowercase_escape,
|
||||
merge_dicts,
|
||||
qualities,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
from ..aes import (
|
||||
@@ -40,8 +43,8 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||
_LOGIN_FORM = 'login_form'
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||
@@ -58,50 +61,33 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return 'href="/logout"' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
return
|
||||
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': self._LOGIN_URL
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
|
||||
post_url = extract_attributes(login_form_str).get('action')
|
||||
if not post_url:
|
||||
post_url = self._LOGIN_URL
|
||||
elif not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||
|
||||
login_form.update({
|
||||
'login_form[name]': username,
|
||||
'login_form[password]': password,
|
||||
})
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=compat_urllib_parse_urlencode({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}).encode('ascii'))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -733,13 +719,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_data = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
|
||||
display_id)['content']['byId'][internal_id]
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
initial_state = self._parse_json(
|
||||
self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
|
||||
display_id)
|
||||
episode_data = initial_state['content']['byId'][internal_id]
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
video_id = episode_data['external_id'].split('.')[1]
|
||||
series_id = episode_data['episode_metadata']['series_slug_title']
|
||||
return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
|
||||
CrunchyrollIE.ie_key(), video_id)
|
||||
|
||||
app_config = self._parse_json(
|
||||
self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
|
||||
display_id)
|
||||
client_id = app_config['cxApiParams']['accountAuthClientId']
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', display_id,
|
||||
note='Authenticating with cookie',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', display_id,
|
||||
note='Retrieving signed policy',
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
bucket = policy_response['cms']['bucket']
|
||||
params = {
|
||||
'Policy': policy_response['cms']['policy'],
|
||||
'Signature': policy_response['cms']['signature'],
|
||||
'Key-Pair-Id': policy_response['cms']['key_pair_id']
|
||||
}
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
episode_response = self._download_json(
|
||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
||||
note='Retrieving episode metadata',
|
||||
query=params)
|
||||
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
|
||||
raise ExtractorError('This video is for premium members only.', expected=True)
|
||||
stream_response = self._download_json(
|
||||
episode_response['playback'], display_id,
|
||||
note='Retrieving stream info')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
|
||||
for thumbnail_data in thumbnails_data:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_data.get('source'),
|
||||
'width': thumbnail_data.get('width'),
|
||||
'height': thumbnail_data.get('height'),
|
||||
})
|
||||
subtitles = {}
|
||||
for lang, subtitle_data in stream_response.get('subtitles').items():
|
||||
subtitles[lang] = [{
|
||||
'url': subtitle_data.get('url'),
|
||||
'ext': subtitle_data.get('format')
|
||||
}]
|
||||
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||
|
||||
formats = []
|
||||
for stream_type, streams in stream_response.get('streams', {}).items():
|
||||
if stream_type not in requested_formats:
|
||||
continue
|
||||
for stream in streams.values():
|
||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||
if hardsub_lang.lower() not in requested_hardsubs:
|
||||
continue
|
||||
format_id = join_nonempty(
|
||||
stream_type,
|
||||
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
if stream_type.split('_')[-1] == 'hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream['url'], display_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
elif stream_type.split('_')[-1] == 'dash':
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream['url'], display_id, mpd_id=format_id,
|
||||
note='Downloading %s information' % format_id,
|
||||
fatal=False)
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = stream_response.get('audio_locale')
|
||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||
formats.extend(adaptive_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': internal_id,
|
||||
'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
|
||||
'description': episode_response.get('description').replace(r'\r\n', '\n'),
|
||||
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'series': episode_response.get('series_title'),
|
||||
'series_id': episode_response.get('series_id'),
|
||||
'season': episode_response.get('season_title'),
|
||||
'season_id': episode_response.get('season_id'),
|
||||
'season_number': episode_response.get('season_number'),
|
||||
'episode': episode_response.get('title'),
|
||||
'episode_number': episode_response.get('sequence_number'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTMLParseError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -11,9 +12,11 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
@@ -126,8 +129,12 @@ class CSpanIE(InfoExtractor):
|
||||
ext = 'vtt'
|
||||
subtitle['ext'] = ext
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
title = get_element_by_class('video-page-title', webpage) or \
|
||||
self._og_search_title(webpage)
|
||||
try:
|
||||
title = get_element_by_class('video-page-title', webpage)
|
||||
except compat_HTMLParseError:
|
||||
title = None
|
||||
if title is None:
|
||||
title = self._og_search_title(webpage)
|
||||
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||
self._html_search_meta(['og:description', 'description'], webpage)
|
||||
return merge_dicts(info, ld_info, {
|
||||
@@ -242,3 +249,42 @@ class CSpanIE(InfoExtractor):
|
||||
'title': title,
|
||||
'id': 'c' + video_id if video_type == 'clip' else video_id,
|
||||
}
|
||||
|
||||
|
||||
class CSpanCongressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?c-span\.org/congress/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.c-span.org/congress/?chamber=house&date=2017-12-13&t=1513208380',
|
||||
'info_dict': {
|
||||
'id': 'house_2017-12-13',
|
||||
'title': 'Congressional Chronicle - Members of Congress, Hearings and More',
|
||||
'description': 'md5:54c264b7a8f219937987610243305a84',
|
||||
'thumbnail': r're:https://ximage.c-spanvideo.org/.+',
|
||||
'ext': 'mp4'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_date = query.get('date', [None])[0]
|
||||
video_id = join_nonempty(query.get('chamber', ['senate'])[0], video_date, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if not video_date:
|
||||
jwp_date = re.search(r'jwsetup.clipprogdate = \'(?P<date>\d{4}-\d{2}-\d{2})\';', webpage)
|
||||
if jwp_date:
|
||||
video_id = f'{video_id}_{jwp_date.group("date")}'
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title'))
|
||||
description = (self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, 'description', default=None))
|
||||
|
||||
return {
|
||||
**self._parse_jwplayer_data(jwplayer_data, video_id, False),
|
||||
'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
|
||||
'description': description,
|
||||
'http_headers': {'Referer': 'https://www.c-span.org/'},
|
||||
}
|
||||
|
||||
@@ -207,12 +207,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if not self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -261,9 +259,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4',
|
||||
'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
|
||||
@@ -157,11 +157,8 @@ class DaumListIE(InfoExtractor):
|
||||
query_dict = parse_qs(url)
|
||||
if 'clipid' in query_dict:
|
||||
clip_id = query_dict['clipid'][0]
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||
if not self._yes_playlist(list_id, clip_id):
|
||||
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||
|
||||
|
||||
class DaumPlaylistIE(DaumListIE):
|
||||
|
||||
48
yt_dlp/extractor/daystar.py
Normal file
48
yt_dlp/extractor/daystar.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, urljoin
|
||||
|
||||
|
||||
class DaystarClipIE(InfoExtractor):
|
||||
IE_NAME = 'daystar:clip'
|
||||
_VALID_URL = r'https?://player\.daystar\.tv/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.daystar.tv/0MTO2ITM',
|
||||
'info_dict': {
|
||||
'id': '0MTO2ITM',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Dark World of COVID Pt. 1 | Aaron Siri',
|
||||
'description': 'a420d320dda734e5f29458df3606c5f4',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_iframe = self._search_regex(r'\<iframe[^>]+src="([^"]+)"', webpage, 'src iframe')
|
||||
webpage_iframe = self._download_webpage(
|
||||
src_iframe.replace('player.php', 'config2.php'), video_id, headers={'Referer': src_iframe})
|
||||
|
||||
sources = self._parse_json(self._search_regex(
|
||||
r'sources\:\s*(\[.*?\])', webpage_iframe, 'm3u8 source'), video_id, transform_source=js_to_json)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for source in sources:
|
||||
file = source.get('file')
|
||||
if file and source.get('type') == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
urljoin('https://www.lightcast.com/embed/', file),
|
||||
video_id, 'mp4', fatal=False, headers={'Referer': src_iframe})
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._search_regex(r'image:\s*"([^"]+)', webpage_iframe, 'thumbnail'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -20,6 +20,16 @@ class DoodStreamIE(InfoExtractor):
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://dood.watch/d/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
'info_dict': {
|
||||
'id': '5s1wmbdacezb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kat Wonders - Monthly May 2020',
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dood.to/d/jzrxn12t2s7n',
|
||||
'md5': '3207e199426eca7c2aa23c2872e6728a',
|
||||
@@ -34,31 +44,26 @@ class DoodStreamIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = f'https://dood.to/e/{video_id}'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '/d/' in url:
|
||||
url = "https://dood.to" + self._html_search_regex(
|
||||
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'],
|
||||
webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'],
|
||||
webpage, default=None)
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
|
||||
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, default=None)
|
||||
auth_url = 'https://dood.to' + self._html_search_regex(
|
||||
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
['og:description', 'description', 'twitter:description'], webpage, default=None)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
|
||||
'referer': url
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(auth_url, video_id, headers=headers)
|
||||
final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
|
||||
pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
final_url = ''.join((
|
||||
self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
|
||||
*(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
|
||||
f'?token={token}&expiry={int(time.time() * 1000)}',
|
||||
))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -369,6 +369,222 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
|
||||
|
||||
|
||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'info_dict': {
|
||||
'id': '4164906',
|
||||
'display_id': 'dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rodbuster / Galvanizer',
|
||||
'description': 'Mike installs rebar with a team of rodbusters, then he galvanizes steel.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dsc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.go.discovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'info_dict': {
|
||||
'id': '2220256',
|
||||
'display_id': 'ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ghost Train of Ely',
|
||||
'description': 'The crew investigates the dark history of the Nevada Northern Railway.',
|
||||
'season_number': 24,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'trav'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.travelchannel.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'info_dict': {
|
||||
'id': '2348634',
|
||||
'display_id': 'carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Postman Always Brings Rice',
|
||||
'description': 'Noah visits the Maui Fair and the Aurora Winter Festival in Vancouver.',
|
||||
'season_number': 9,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'cook'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.cookingchanneltv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'info_dict': {
|
||||
'id': '4289736',
|
||||
'display_id': 'home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Mold House',
|
||||
'description': 'Joe and Noel help take a familys dream home from hazardous to fabulous.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'hgtv'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.hgtv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'info_dict': {
|
||||
'id': '4116449',
|
||||
'display_id': 'kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'ext': 'mp4',
|
||||
'title': 'Float Like a Butterfly',
|
||||
'description': 'The 12 kid bakers create colorful carved butterfly cakes.',
|
||||
'season_number': 10,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'food'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.watch.foodnetwork.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'info_dict': {
|
||||
'id': '4210904',
|
||||
'display_id': 'alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'ext': 'mp4',
|
||||
'title': 'Central Alaskas Bigfoot',
|
||||
'description': 'A team heads to central Alaska to investigate an aggressive Bigfoot.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dam'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.destinationamerica.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'info_dict': {
|
||||
'id': '2139409',
|
||||
'display_id': 'unmasked-investigation-discovery/the-killer-clown',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Killer Clown',
|
||||
'description': 'A wealthy Florida woman is fatally shot in the face by a clown at her door.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ids'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.investigationdiscovery.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'info_dict': {
|
||||
'id': '2309730',
|
||||
'display_id': 'modern-sniper-ahc/army',
|
||||
'ext': 'mp4',
|
||||
'title': 'Army',
|
||||
'description': 'Snipers today face challenges their predecessors couldve only dreamed of.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'ahc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.ahctv.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -383,6 +599,9 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'sci'
|
||||
@@ -407,6 +626,9 @@ class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'diy'
|
||||
@@ -417,6 +639,33 @@ class DIYNetworkIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'info_dict': {
|
||||
'id': '2218238',
|
||||
'display_id': 'surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bodily Trauma',
|
||||
'description': 'Meet three people who tested the limits of the human body.',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'dlf'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.discoverylife.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
@@ -431,6 +680,9 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
'episode_number': 11,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'apl'
|
||||
@@ -441,6 +693,33 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class TLCIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'info_dict': {
|
||||
'id': '2206540',
|
||||
'display_id': 'my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melissas Story (Part 1)',
|
||||
'description': 'At 650 lbs, Melissa is ready to begin her seven-year weight loss journey.',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'tlc'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.tlc.com',
|
||||
'realm': 'go',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -56,8 +56,8 @@ class DropboxIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
json_string = self._html_search_regex(r'InitReact\.mountComponent.+ "props":(.+), "elem_id"', webpage, 'Info JSON')
|
||||
info_json = self._parse_json(json_string, video_id)
|
||||
json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON')
|
||||
info_json = self._parse_json(json_string, video_id).get('props')
|
||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||
|
||||
|
||||
@@ -7,13 +7,11 @@ import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
str_or_none,
|
||||
@@ -191,13 +189,11 @@ class DRTVIE(InfoExtractor):
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||
key = bytes_to_intlist(hashlib.sha256(
|
||||
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||
return intlist_to_bytes(
|
||||
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||
data = hex_to_bytes(e[10:10 + n])
|
||||
key = hashlib.sha256(('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest()
|
||||
iv = hex_to_bytes(a)
|
||||
decrypted = unpad_pkcs7(aes_cbc_decrypt_bytes(data, key, iv))
|
||||
return decrypted.decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
|
||||
@@ -7,16 +7,6 @@ class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'http://www.engadget.com/video/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
'id': '518153925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||
'only_matching': True,
|
||||
|
||||
316
yt_dlp/extractor/ertgr.py
Normal file
316
yt_dlp/extractor/ertgr.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class ERTFlixBaseIE(InfoExtractor):
|
||||
def _call_api(
|
||||
self, video_id, method='Player/AcquireContent', api_version=1,
|
||||
param_headers=None, data=None, headers=None, **params):
|
||||
platform_codename = {'platformCodename': 'www'}
|
||||
headers_as_param = {'X-Api-Date-Format': 'iso', 'X-Api-Camel-Case': False}
|
||||
headers_as_param.update(param_headers or {})
|
||||
headers = headers or {}
|
||||
if data:
|
||||
headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8'
|
||||
data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8')
|
||||
query = merge_dicts(
|
||||
{} if data else platform_codename,
|
||||
{'$headers': json.dumps(headers_as_param)},
|
||||
params)
|
||||
response = self._download_json(
|
||||
'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method),
|
||||
video_id, fatal=False, query=query, data=data, headers=headers)
|
||||
if try_get(response, lambda x: x['Result']['Success']) is True:
|
||||
return response
|
||||
|
||||
def _call_api_get_tiles(self, video_id, *tile_ids):
|
||||
requested_tile_ids = [video_id] + list(tile_ids)
|
||||
requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids]
|
||||
tiles_response = self._call_api(
|
||||
video_id, method='Tile/GetTiles', api_version=2,
|
||||
data={'RequestedTiles': requested_tiles})
|
||||
tiles = try_get(tiles_response, lambda x: x['Tiles'], list) or []
|
||||
if tile_ids:
|
||||
if sorted([tile['Id'] for tile in tiles]) != sorted(requested_tile_ids):
|
||||
raise ExtractorError('Requested tiles not found', video_id=video_id)
|
||||
return tiles
|
||||
try:
|
||||
return next(tile for tile in tiles if tile['Id'] == video_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('No matching tile found', video_id=video_id)
|
||||
|
||||
|
||||
class ERTFlixCodenameIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix:codename'
|
||||
IE_DESC = 'ERTFLIX videos by codename'
|
||||
_VALID_URL = r'ertflix:(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
|
||||
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
|
||||
'info_dict': {
|
||||
'id': 'monogramma-praxitelis-tzanoylinos',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_formats_and_subs(self, video_id, allow_none=True):
|
||||
media_info = self._call_api(video_id, codename=video_id)
|
||||
formats, subs = [], {}
|
||||
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
|
||||
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
|
||||
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
|
||||
if not fmt_url:
|
||||
continue
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
|
||||
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
|
||||
fmt_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': str_or_none(media.get('Id')),
|
||||
})
|
||||
continue
|
||||
formats.extend(formats_)
|
||||
self._merge_subtitles(subs_, target=subs)
|
||||
|
||||
if formats or not allow_none:
|
||||
self._sort_formats(formats)
|
||||
return formats, subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
formats, subs = self._extract_formats_and_subs(video_id)
|
||||
|
||||
if formats:
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'title': self._generic_title(url),
|
||||
}
|
||||
|
||||
|
||||
class ERTFlixIE(ERTFlixBaseIE):
|
||||
IE_NAME = 'ertflix'
|
||||
IE_DESC = 'ERTFLIX videos'
|
||||
_VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
|
||||
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
|
||||
'info_dict': {
|
||||
'id': 'aoratoi-ergates',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c1433d598fbba0211b0069021517f8b4',
|
||||
'description': 'md5:01a64d113c31957eb7eb07719ab18ff4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'episode_id': 'vod.173258',
|
||||
'timestamp': 1639648800,
|
||||
'upload_date': '20211216',
|
||||
'duration': 3166,
|
||||
'age_limit': 8,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 64,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
|
||||
'info_dict': {
|
||||
'id': 'ser.3448',
|
||||
'age_limit': 8,
|
||||
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
|
||||
'title': 'Μονόγραμμα',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}, {
|
||||
'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
|
||||
'info_dict': {
|
||||
'id': 'ser.164991',
|
||||
'age_limit': 8,
|
||||
'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.',
|
||||
'title': 'Το δίκτυο',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
codename = try_get(episode, lambda x: x['Codename'], compat_str)
|
||||
title = episode.get('Title')
|
||||
description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', )))
|
||||
if not codename or not title or not episode.get('HasPlayableStream', True):
|
||||
return
|
||||
thumbnail = next((
|
||||
url_or_none(thumb.get('Url'))
|
||||
for thumb in variadic(dict_get(episode, ('Images', 'Image')) or {})
|
||||
if thumb.get('IsMain')),
|
||||
None)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'thumbnail': thumbnail,
|
||||
'id': codename,
|
||||
'episode_id': episode.get('Id'),
|
||||
'title': title,
|
||||
'alt_title': episode.get('Subtitle'),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(episode.get('PublishDate')),
|
||||
'duration': episode.get('DurationSeconds'),
|
||||
'age_limit': self._parse_age_rating(episode),
|
||||
'url': 'ertflix:%s' % (codename, ),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_age_rating(info_dict):
|
||||
return parse_age_limit(
|
||||
info_dict.get('AgeRating')
|
||||
or (info_dict.get('IsAdultContent') and 18)
|
||||
or (info_dict.get('IsKidsContent') and 0))
|
||||
|
||||
def _extract_series(self, video_id, season_titles=None, season_numbers=None):
|
||||
media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
|
||||
|
||||
series = try_get(media_info, lambda x: x['Series'], dict) or {}
|
||||
series_info = {
|
||||
'age_limit': self._parse_age_rating(series),
|
||||
'title': series.get('Title'),
|
||||
'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
|
||||
}
|
||||
if season_numbers:
|
||||
season_titles = season_titles or []
|
||||
for season in try_get(series, lambda x: x['Seasons'], list) or []:
|
||||
if season.get('SeasonNumber') in season_numbers and season.get('Title'):
|
||||
season_titles.append(season['Title'])
|
||||
|
||||
def gen_episode(m_info, season_titles):
|
||||
for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
|
||||
if season_titles and episode_group.get('Title') not in season_titles:
|
||||
continue
|
||||
episodes = try_get(episode_group, lambda x: x['Episodes'], list)
|
||||
if not episodes:
|
||||
continue
|
||||
season_info = {
|
||||
'season': episode_group.get('Title'),
|
||||
'season_number': int_or_none(episode_group.get('SeasonNumber')),
|
||||
}
|
||||
try:
|
||||
episodes = [(int(ep['EpisodeNumber']), ep) for ep in episodes]
|
||||
episodes.sort()
|
||||
except (KeyError, ValueError):
|
||||
episodes = enumerate(episodes, 1)
|
||||
for n, episode in episodes:
|
||||
info = self._extract_episode(episode)
|
||||
if info is None:
|
||||
continue
|
||||
info['episode_number'] = n
|
||||
info.update(season_info)
|
||||
yield info
|
||||
|
||||
return self.playlist_result(
|
||||
gen_episode(media_info, season_titles), playlist_id=video_id, **series_info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if video_id.startswith('ser.'):
|
||||
param_season = parse_qs(url).get('season', [None])
|
||||
param_season = [
|
||||
(have_number, int_or_none(v) if have_number else str_or_none(v))
|
||||
for have_number, v in
|
||||
[(int_or_none(ps) is not None, ps) for ps in param_season]
|
||||
if v is not None
|
||||
]
|
||||
season_kwargs = {
|
||||
k: [v for is_num, v in param_season if is_num is c] or None
|
||||
for k, c in
|
||||
[('season_titles', False), ('season_numbers', True)]
|
||||
}
|
||||
return self._extract_series(video_id, **season_kwargs)
|
||||
|
||||
return self._extract_episode(self._call_api_get_tiles(video_id))
|
||||
|
||||
|
||||
class ERTWebtvEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'ertwebtv:embed'
|
||||
IE_DESC = 'ert.gr webtv embedded videos'
|
||||
_BASE_PLAYER_URL_RE = re.escape('//www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php')
|
||||
_VALID_URL = rf'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?f=(?P<id>[^#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ert.gr/webtv/live-uni/vod/dt-uni-vod.php?f=trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4&bgimg=/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
|
||||
'md5': 'f9e9900c25c26f4ecfbddbb4b6305854',
|
||||
'info_dict': {
|
||||
'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4',
|
||||
'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg'
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
EMBED_URL_RE = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{EMBED_URL_RE})(?P=_q1)'
|
||||
|
||||
for mobj in re.finditer(EMBED_RE, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
yield url
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
|
||||
video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
|
||||
if thumbnail_id and not thumbnail_id.startswith('http'):
|
||||
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': f'VOD - {video_id}',
|
||||
'thumbnail': thumbnail_id,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
}
|
||||
@@ -14,6 +14,10 @@ from .abcotvs import (
|
||||
ABCOTVSIE,
|
||||
ABCOTVSClipsIE,
|
||||
)
|
||||
from .abematv import (
|
||||
AbemaTVIE,
|
||||
AbemaTVTitleIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@@ -64,6 +68,10 @@ from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .alsace20tv import (
|
||||
Alsace20TVIE,
|
||||
Alsace20TVEmbedIE,
|
||||
)
|
||||
from .apa import APAIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
@@ -87,6 +95,7 @@ from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
@@ -118,7 +127,7 @@ from .bandcamp import (
|
||||
BandcampIE,
|
||||
BandcampAlbumIE,
|
||||
BandcampWeeklyIE,
|
||||
BandcampMusicIE,
|
||||
BandcampUserIE,
|
||||
)
|
||||
from .bannedvideo import BannedVideoIE
|
||||
from .bbc import (
|
||||
@@ -142,6 +151,7 @@ from .bfmtv import (
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
@@ -194,6 +204,7 @@ from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .cableav import CableAVIE
|
||||
from .callin import CallinIE
|
||||
from .caltrans import CaltransIE
|
||||
from .cam4 import CAM4IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
@@ -300,6 +311,10 @@ from .commonprotocols import (
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .cpac import (
|
||||
CPACIE,
|
||||
CPACPlaylistIE,
|
||||
)
|
||||
from .cozytv import CozyTVIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@@ -314,7 +329,7 @@ from .crunchyroll import (
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .cspan import CSpanIE, CSpanCongressIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
@@ -342,6 +357,7 @@ from .daum import (
|
||||
DaumPlaylistIE,
|
||||
DaumUserIE,
|
||||
)
|
||||
from .daystar import DaystarClipIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import (
|
||||
@@ -361,9 +377,19 @@ from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
GoDiscoveryIE,
|
||||
TravelChannelIE,
|
||||
CookingChannelIE,
|
||||
HGTVUsaIE,
|
||||
FoodNetworkIE,
|
||||
InvestigationDiscoveryIE,
|
||||
DestinationAmericaIE,
|
||||
AmHistoryChannelIE,
|
||||
ScienceChannelIE,
|
||||
DIYNetworkIE,
|
||||
DiscoveryLifeIE,
|
||||
AnimalPlanetIE,
|
||||
TLCIE,
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryNetworksDeIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
@@ -387,11 +413,6 @@ from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import (
|
||||
DiscoveryGoIE,
|
||||
DiscoveryGoPlaylistIE,
|
||||
)
|
||||
from .discoveryvr import DiscoveryVRIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .doodstream import DoodStreamIE
|
||||
@@ -433,6 +454,11 @@ from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
ERTWebtvEmbedIE,
|
||||
)
|
||||
from .escapist import EscapistIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
@@ -462,6 +488,7 @@ from .faz import FazIE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
FC2EmbedIE,
|
||||
FC2LiveIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .filmmodu import FilmmoduIE
|
||||
@@ -471,7 +498,6 @@ from .filmon import (
|
||||
)
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
@@ -494,6 +520,7 @@ from .foxnews import (
|
||||
FoxNewsArticleIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .fptplay import FptplayIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
@@ -503,7 +530,6 @@ from .francetv import (
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
@@ -538,7 +564,10 @@ from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .gettr import GettrIE
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
)
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
@@ -575,7 +604,6 @@ from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
HotStarIE,
|
||||
@@ -645,7 +673,6 @@ from .iqiyi import (
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import (
|
||||
ITVIE,
|
||||
ITVBTCCIE,
|
||||
@@ -667,7 +694,6 @@ from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
@@ -823,6 +849,7 @@ from .microsoftvirtualacademy import (
|
||||
from .mildom import (
|
||||
MildomIE,
|
||||
MildomVodIE,
|
||||
MildomClipIE,
|
||||
MildomUserVodIE,
|
||||
)
|
||||
from .minds import (
|
||||
@@ -880,7 +907,14 @@ from .mtv import (
|
||||
MTVItaliaProgrammaIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
MusicdexAlbumIE,
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
@@ -968,6 +1002,7 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
@@ -976,6 +1011,9 @@ from .nfl import (
|
||||
from .nhk import (
|
||||
NhkVodIE,
|
||||
NhkVodProgramIE,
|
||||
NhkForSchoolBangumiIE,
|
||||
NhkForSchoolSubjectIE,
|
||||
NhkForSchoolProgramListIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
@@ -985,14 +1023,16 @@ from .nick import (
|
||||
NickNightIE,
|
||||
NickRuIE,
|
||||
)
|
||||
|
||||
from .niconico import (
|
||||
NiconicoIE,
|
||||
NiconicoPlaylistIE,
|
||||
NiconicoUserIE,
|
||||
NiconicoSeriesIE,
|
||||
NiconicoHistoryIE,
|
||||
NicovideoSearchDateIE,
|
||||
NicovideoSearchIE,
|
||||
NicovideoSearchURLIE,
|
||||
NicovideoTagURLIE,
|
||||
)
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
@@ -1124,6 +1164,7 @@ from .patreon import (
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
PeerTubeIE,
|
||||
PeerTubePlaylistIE,
|
||||
@@ -1142,6 +1183,7 @@ from .periscope import (
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .piapro import PiaproIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
@@ -1303,11 +1345,14 @@ from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rice import RICEIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
RokfinIE,
|
||||
RokfinStackIE,
|
||||
RokfinChannelIE,
|
||||
)
|
||||
from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rozhlas import RozhlasIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
@@ -1358,9 +1403,17 @@ from .megatvcom import (
|
||||
MegaTVComIE,
|
||||
MegaTVComEmbedIE,
|
||||
)
|
||||
from .ant1newsgr import (
|
||||
Ant1NewsGrWatchIE,
|
||||
Ant1NewsGrArticleIE,
|
||||
Ant1NewsGrEmbedIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .ruutu import RuutuIE
|
||||
from .ruv import RuvIE
|
||||
from .ruv import (
|
||||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@@ -1557,6 +1610,7 @@ from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telegram import TelegramEmbedIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telemundo import TelemundoIE
|
||||
from .telequebec import (
|
||||
@@ -1578,7 +1632,6 @@ from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thescene import TheSceneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theta import (
|
||||
@@ -1600,6 +1653,7 @@ from .tiktok import (
|
||||
TikTokSoundIE,
|
||||
TikTokEffectIE,
|
||||
TikTokTagIE,
|
||||
TikTokVMIE,
|
||||
DouyinIE,
|
||||
)
|
||||
from .tinypic import TinyPicIE
|
||||
@@ -1797,6 +1851,10 @@ from .vice import (
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
from .videocampus_sachsen import (
|
||||
VideocampusSachsenIE,
|
||||
VideocampusSachsenEmbedIE,
|
||||
)
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomore import (
|
||||
@@ -1829,7 +1887,10 @@ from .vimeo import (
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimm import VimmIE
|
||||
from .vimm import (
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
@@ -1880,7 +1941,6 @@ from .vrv import (
|
||||
from .vshare import VShareIE
|
||||
from .vtm import VTMIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vupload import VuploadIE
|
||||
from .vvvvid import (
|
||||
@@ -1952,6 +2012,7 @@ from .ximalaya import (
|
||||
XimalayaIE,
|
||||
XimalayaAlbumIE
|
||||
)
|
||||
from .xinpianchang import XinpianchangIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
@@ -1976,6 +2037,7 @@ from .yandexmusic import (
|
||||
)
|
||||
from .yandexvideo import (
|
||||
YandexVideoIE,
|
||||
YandexVideoPreviewIE,
|
||||
ZenYandexIE,
|
||||
ZenYandexChannelIE,
|
||||
)
|
||||
@@ -2008,6 +2070,7 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
|
||||
@@ -1,18 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
WebSocketsWrapper,
|
||||
has_websockets,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,41 +87,33 @@ class FC2IE(InfoExtractor):
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
title, thumbnail, description = None, None, None
|
||||
if webpage is not None:
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._html_search_regex(
|
||||
(r'<h2\s+class="videoCnt_title">([^<]+?)</h2>',
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*<img',
|
||||
# there's two matches in the webpage
|
||||
r'\s+href="[^"]+"\s*title="([^"]+?)"\s*rel="nofollow">\s*\1'),
|
||||
webpage,
|
||||
'title', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
# most of the time we can still download wideo even if err_code is 403 or 602
|
||||
self.report_warning(
|
||||
'Error code was: %s... but still trying' % info['err_code'][0])
|
||||
|
||||
if 'filepath' not in info:
|
||||
raise ExtractorError('Cannot download file. Are you logged in?')
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
vidplaylist = self._download_json(
|
||||
'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id,
|
||||
note='Downloading info page')
|
||||
vid_url = traverse_obj(vidplaylist, ('playlist', 'nq'))
|
||||
if not vid_url:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
vid_url = urljoin('https://video.fc2.com/', vid_url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'url': vid_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
@@ -157,3 +154,146 @@ class FC2EmbedIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class FC2LiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.fc2\.com/(?P<id>\d+)'
|
||||
IE_NAME = 'fc2:live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.fc2.com/57892267/',
|
||||
'info_dict': {
|
||||
'id': '57892267',
|
||||
'title': 'どこまで・・・',
|
||||
'uploader': 'あつあげ',
|
||||
'uploader_id': '57892267',
|
||||
'thumbnail': r're:https?://.+fc2.+',
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not has_websockets:
|
||||
raise ExtractorError('websockets library is not available. Please install it.', expected=True)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id)
|
||||
|
||||
self._set_cookie('live.fc2.com', 'js-player_size', '1')
|
||||
|
||||
member_api = self._download_json(
|
||||
'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({
|
||||
'channel': '1',
|
||||
'profile': '1',
|
||||
'user': '1',
|
||||
'streamid': video_id
|
||||
}), note='Requesting member info')
|
||||
|
||||
control_server = self._download_json(
|
||||
'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data',
|
||||
data=urlencode_postdata({
|
||||
'channel_id': video_id,
|
||||
'mode': 'play',
|
||||
'orz': '',
|
||||
'channel_version': member_api['data']['channel_data']['version'],
|
||||
'client_version': '2.1.0\n [1]',
|
||||
'client_type': 'pc',
|
||||
'client_app': 'browser_hls',
|
||||
'ipv6': '',
|
||||
}), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw'])
|
||||
|
||||
ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']})
|
||||
playlist_data = None
|
||||
|
||||
self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id)
|
||||
ws = WebSocketsWrapper(ws_url, {
|
||||
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
|
||||
'Origin': 'https://live.fc2.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': std_headers['User-Agent'],
|
||||
})
|
||||
ws.__enter__()
|
||||
|
||||
self.write_debug('[debug] Sending HLS server request')
|
||||
|
||||
while True:
|
||||
recv = ws.recv()
|
||||
if not recv:
|
||||
continue
|
||||
data = self._parse_json(recv, video_id, fatal=False)
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
|
||||
if data.get('name') == 'connect_complete':
|
||||
break
|
||||
ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}')
|
||||
|
||||
while True:
|
||||
recv = ws.recv()
|
||||
if not recv:
|
||||
continue
|
||||
data = self._parse_json(recv, video_id, fatal=False)
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
if data.get('name') == '_response_' and data.get('id') == 1:
|
||||
self.write_debug('[debug] Goodbye.')
|
||||
playlist_data = data
|
||||
break
|
||||
elif self._downloader.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
|
||||
if not playlist_data:
|
||||
raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
|
||||
|
||||
formats = []
|
||||
for name, playlists in playlist_data['arguments'].items():
|
||||
if not isinstance(playlists, list):
|
||||
continue
|
||||
for pl in playlists:
|
||||
if pl.get('status') == 0 and 'master_playlist' in pl.get('url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
pl['url'], video_id, ext='mp4', m3u8_id=name, live=True,
|
||||
headers={
|
||||
'Origin': 'https://live.fc2.com',
|
||||
'Referer': url,
|
||||
}))
|
||||
|
||||
self._sort_formats(formats)
|
||||
for fmt in formats:
|
||||
fmt.update({
|
||||
'protocol': 'fc2_live',
|
||||
'ws': ws,
|
||||
})
|
||||
|
||||
title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
||||
if not title:
|
||||
title = self._html_extract_title(webpage, 'html title', fatal=False)
|
||||
if title:
|
||||
# remove service name in <title>
|
||||
title = re.sub(r'\s+-\s+.+$', '', title)
|
||||
uploader = None
|
||||
if title:
|
||||
match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2))
|
||||
if match and all(match):
|
||||
title, uploader = match
|
||||
|
||||
live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None
|
||||
if live_info_view:
|
||||
# remove jQuery code from object literal
|
||||
live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view)
|
||||
live_info_view = self._parse_json(js_to_json(live_info_view), video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title or traverse_obj(live_info_view, 'title'),
|
||||
'description': self._html_search_meta(
|
||||
('og:description', 'twitter:description'),
|
||||
webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'),
|
||||
'formats': formats,
|
||||
'uploader': uploader or traverse_obj(live_info_view, 'name'),
|
||||
'uploader_id': video_id,
|
||||
'thumbnail': traverse_obj(live_info_view, 'thumb'),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||
'info_dict': {
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
'description': 'iPad mini with Retina Display review',
|
||||
'duration': 177,
|
||||
'uploader': 'engadget',
|
||||
'upload_date': '20131115',
|
||||
'timestamp': 1384515288,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||
'url': '5min:518086247',
|
||||
'md5': 'e539a9dd682c288ef5a498898009f69e',
|
||||
'info_dict': {
|
||||
'id': '518086247',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make a Next-Level Fruit Salad',
|
||||
'duration': 184,
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.5min.com/518726732/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://delivery.vidible.tv/aol?playList=518013791',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
102
yt_dlp/extractor/fptplay.py
Normal file
102
yt_dlp/extractor/fptplay.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
join_nonempty,
|
||||
)
|
||||
|
||||
|
||||
class FptplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)'
|
||||
_GEO_COUNTRIES = ['VN']
|
||||
IE_NAME = 'fptplay'
|
||||
IE_DESC = 'fptplay.vn'
|
||||
_TESTS = [{
|
||||
'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
|
||||
'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
|
||||
'info_dict': {
|
||||
'id': '621a123016f369ebbde55945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love',
|
||||
'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
|
||||
'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
|
||||
'info_dict': {
|
||||
'id': '61f3aa8a6b3b1d2e73c60eb5',
|
||||
'ext': 'mp4',
|
||||
'title': 'Má Tôi Là Đại Gia - 3',
|
||||
'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode')
|
||||
webpage = self._download_webpage(url, video_id=video_id, fatal=False)
|
||||
info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(
|
||||
self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def get_api_with_st_token(self, video_id, episode):
|
||||
path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
|
||||
timestamp = int(time.time()) + 10800
|
||||
|
||||
t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
|
||||
r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
|
||||
n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
|
||||
|
||||
def convert(e):
|
||||
t = ''
|
||||
n = 0
|
||||
i = [0, 0, 0]
|
||||
a = [0, 0, 0, 0]
|
||||
s = len(e)
|
||||
c = 0
|
||||
for z in range(s, 0, -1):
|
||||
if n <= 3:
|
||||
i[n] = e[c]
|
||||
n += 1
|
||||
c += 1
|
||||
if 3 == n:
|
||||
a[0] = (252 & i[0]) >> 2
|
||||
a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
|
||||
a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
|
||||
a[3] = (63 & i[2])
|
||||
for v in range(4):
|
||||
t += r[a[v]]
|
||||
n = 0
|
||||
if n:
|
||||
for o in range(n, 3):
|
||||
i[o] = 0
|
||||
|
||||
for o in range(n + 1):
|
||||
a[0] = (252 & i[0]) >> 2
|
||||
a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
|
||||
a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
|
||||
a[3] = (63 & i[2])
|
||||
t += r[a[o]]
|
||||
n += 1
|
||||
while n < 3:
|
||||
t += ''
|
||||
n += 1
|
||||
return t
|
||||
|
||||
st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '')
|
||||
return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}'
|
||||
@@ -1,80 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FreshLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://freshlive.tv/satotv/74712',
|
||||
'md5': '9f0cf5516979c4454ce982df3d97f352',
|
||||
'info_dict': {
|
||||
'id': '74712',
|
||||
'ext': 'mp4',
|
||||
'title': 'テスト',
|
||||
'description': 'テスト',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1511,
|
||||
'timestamp': 1483619655,
|
||||
'upload_date': '20170105',
|
||||
'uploader': 'サトTV',
|
||||
'uploader_id': 'satotv',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id)
|
||||
|
||||
info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id]
|
||||
|
||||
title = info['title']
|
||||
|
||||
if info.get('status') == 'upcoming':
|
||||
raise ExtractorError('Stream %s is upcoming' % video_id, expected=True)
|
||||
|
||||
stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl']
|
||||
|
||||
is_live = info.get('liveStreamUrl') is not None
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': int_or_none(info.get('airTime')),
|
||||
'timestamp': unified_timestamp(info.get('createdAt')),
|
||||
'uploader': try_get(
|
||||
info, lambda x: x['channel']['title'], compat_str),
|
||||
'uploader_id': try_get(
|
||||
info, lambda x: x['channel']['code'], compat_str),
|
||||
'uploader_url': try_get(
|
||||
info, lambda x: x['channel']['permalink'], compat_str),
|
||||
'view_count': int_or_none(info.get('viewCount')),
|
||||
'comment_count': int_or_none(info.get('commentCount')),
|
||||
'tags': info.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
@@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_name = lesson.get('slug')
|
||||
if not lesson_name:
|
||||
continue
|
||||
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||
if not lesson_id or not lesson_name:
|
||||
continue
|
||||
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||
|
||||
title = course.get('title')
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..utils import HEADRequest
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||
_VALID_URL = r'https?://fod\.fujitv\.co\.jp/title/(?P<sid>[0-9a-z]{4})/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'https://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
@@ -16,33 +16,58 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810075',
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
|
||||
'info_dict': {
|
||||
'id': '5d40810075',
|
||||
'title': '5d40810075',
|
||||
'id': '5d40110076',
|
||||
'ext': 'mp4',
|
||||
'format_id': '4000',
|
||||
'thumbnail': 'http://i.fod.fujitv.co.jp/pc/image/wbtn/wbtn_5d40810075.jpg'
|
||||
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'description': 'md5:b3f51dbfdda162ac4f789e0ff4d65750',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40110076_a.jpg',
|
||||
},
|
||||
'skip': 'Expires after a week'
|
||||
}, {
|
||||
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
|
||||
'info_dict': {
|
||||
'id': '5d40810083',
|
||||
'ext': 'mp4',
|
||||
'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻',
|
||||
'description': 'md5:3972d900b896adc8ab1849e310507efa',
|
||||
'series': 'ちびまる子ちゃん',
|
||||
'series_id': '5d40',
|
||||
'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'},
|
||||
'skip': 'Video available only in one week'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/tv_android/%s.m3u8' % video_id, video_id, 'mp4')
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
f.update({
|
||||
'width': wh[0],
|
||||
'height': wh[1],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
series_id, video_id = self._match_valid_url(url).groups()
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
json_info = {}
|
||||
token = self._get_cookies(url).get('CT')
|
||||
if token:
|
||||
json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False)
|
||||
else:
|
||||
self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}')
|
||||
formats, subtitles = [], {}
|
||||
src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id)
|
||||
for src in src_json['video_selector']:
|
||||
if not src.get('url'):
|
||||
continue
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
|
||||
for f in fmt:
|
||||
f.update(dict(zip(('height', 'width'),
|
||||
self._BITRATE_MAP.get(f.get('tbr'), ()))))
|
||||
formats.extend(fmt)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
self._sort_formats(formats, ['tbr'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'title': json_info.get('ep_title'),
|
||||
'series': json_info.get('lu_title'),
|
||||
'series_id': series_id,
|
||||
'description': json_info.get('ep_description'),
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
|
||||
}
|
||||
|
||||
@@ -103,6 +103,7 @@ from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .glomex import GlomexEmbedIE
|
||||
from .megatvcom import MegaTVComEmbedIE
|
||||
from .ant1newsgr import Ant1NewsGrEmbedIE
|
||||
from .limelight import LimelightBaseIE
|
||||
from .anvato import AnvatoIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
@@ -140,6 +141,7 @@ from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
from .wimtv import WimTVIE
|
||||
from .tvopengr import TVOpenGrEmbedIE
|
||||
from .ertgr import ERTWebtvEmbedIE
|
||||
from .tvp import TVPEmbedIE
|
||||
from .blogger import BloggerIE
|
||||
from .mainstreaming import MainStreamingIE
|
||||
@@ -212,7 +214,7 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'info_dict': {
|
||||
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'title': 'Zero Punctuation',
|
||||
'description': 're:.*groundbreaking video review series.*'
|
||||
},
|
||||
@@ -257,6 +259,9 @@ class GenericIE(InfoExtractor):
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'age_limit': 0,
|
||||
'season': 'Season 1',
|
||||
'direct': True,
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
@@ -273,6 +278,16 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
},
|
||||
# RSS feed with guid
|
||||
{
|
||||
'url': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
|
||||
'info_dict': {
|
||||
'id': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
|
||||
'description': 'md5:be809a44b63b0c56fb485caf68685520',
|
||||
'title': 'The Little Red Podcast',
|
||||
},
|
||||
'playlist_mincount': 76,
|
||||
},
|
||||
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||
{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||
@@ -1455,24 +1470,6 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 45.115,
|
||||
},
|
||||
},
|
||||
# 5min embed
|
||||
{
|
||||
'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
|
||||
'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
|
||||
'info_dict': {
|
||||
'id': '518726732',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
|
||||
'timestamp': 1427237531,
|
||||
'uploader': 'Crunch Report',
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
@@ -1923,6 +1920,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
|
||||
'info_dict': {
|
||||
'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
|
||||
'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ThePlatform embedded with whitespaces in URLs
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
@@ -2526,6 +2532,9 @@ class GenericIE(InfoExtractor):
|
||||
if not next_url:
|
||||
continue
|
||||
|
||||
if it.find('guid').text is not None:
|
||||
next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
|
||||
|
||||
def itunes(key):
|
||||
return xpath_text(
|
||||
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||
@@ -3327,12 +3336,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for 5min embeds
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
|
||||
|
||||
# Look for Crooks and Liars embeds
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
|
||||
@@ -3542,6 +3545,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key())
|
||||
|
||||
# Look for ant1news.gr embeds
|
||||
ant1newsgr_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
|
||||
if ant1newsgr_urls:
|
||||
return self.playlist_from_matches(
|
||||
ant1newsgr_urls, video_id, video_title, ie=Ant1NewsGrEmbedIE.ie_key())
|
||||
|
||||
# Look for WashingtonPost embeds
|
||||
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
||||
if wapo_urls:
|
||||
@@ -3693,6 +3702,13 @@ class GenericIE(InfoExtractor):
|
||||
if tvopengr_urls:
|
||||
return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
|
||||
|
||||
# Look for ert.gr webtv embeds
|
||||
ertwebtv_urls = list(ERTWebtvEmbedIE._extract_urls(webpage))
|
||||
if len(ertwebtv_urls) == 1:
|
||||
return self.url_result(self._proto_relative_url(ertwebtv_urls[0]), video_title=video_title, url_transparent=True)
|
||||
elif ertwebtv_urls:
|
||||
return self.playlist_from_matches(ertwebtv_urls, video_id, video_title, ie=ERTWebtvEmbedIE.ie_key())
|
||||
|
||||
tvp_urls = TVPEmbedIE._extract_urls(webpage)
|
||||
if tvp_urls:
|
||||
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())
|
||||
@@ -3798,13 +3814,16 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
if json_ld.get('url'):
|
||||
if json_ld.get('url') not in (url, None):
|
||||
self.report_detected('JSON LD')
|
||||
if determine_ext(json_ld.get('url')) == 'm3u8':
|
||||
if determine_ext(json_ld['url']) == 'm3u8':
|
||||
json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
|
||||
json_ld['url'], video_id, 'mp4')
|
||||
json_ld.pop('url')
|
||||
self._sort_formats(json_ld['formats'])
|
||||
else:
|
||||
json_ld['_type'] = 'url_transparent'
|
||||
json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
@@ -3979,12 +3998,16 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
headers = {
|
||||
'referer': full_response.geturl()
|
||||
}
|
||||
|
||||
entry_info_dict = {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
if RtmpIE.suitable(video_url):
|
||||
@@ -4002,11 +4025,11 @@ class GenericIE(InfoExtractor):
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||
# Just matching .ism/manifest is not enough to be reliably sure
|
||||
# whether it's actually an ISM manifest or some other streaming
|
||||
|
||||
@@ -3,22 +3,30 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class GettrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?gettr\.com/post/(?P<id>[a-z0-9]+)'
|
||||
class GettrBaseIE(InfoExtractor):
|
||||
_BASE_REGEX = r'https?://(www\.)?gettr\.com/'
|
||||
_MEDIA_BASE_URL = 'https://media.gettr.com/'
|
||||
|
||||
def _call_api(self, path, video_id, *args, **kwargs):
|
||||
return self._download_json(urljoin('https://api.gettr.com/u/', path), video_id, *args, **kwargs)['result']
|
||||
|
||||
|
||||
class GettrIE(GettrBaseIE):
|
||||
_VALID_URL = GettrBaseIE._BASE_REGEX + r'post/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gettr.com/post/pcf6uv838f',
|
||||
'info_dict': {
|
||||
@@ -28,9 +36,11 @@ class GettrIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader': 'EpochTV',
|
||||
'uploader_id': 'epochtv',
|
||||
'upload_date': '20210927',
|
||||
'thumbnail': r're:^https?://.+/out\.jpg',
|
||||
'timestamp': 1632782451058,
|
||||
'timestamp': 1632782451.058,
|
||||
'duration': 58.5585,
|
||||
'tags': ['hornofafrica', 'explorations'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gettr.com/post/p4iahp',
|
||||
@@ -41,43 +51,69 @@ class GettrIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Neues Forum Freiheit',
|
||||
'uploader_id': 'nf_freiheit',
|
||||
'upload_date': '20210718',
|
||||
'thumbnail': r're:^https?://.+/out\.jpg',
|
||||
'timestamp': 1626594455017,
|
||||
'timestamp': 1626594455.017,
|
||||
'duration': 23,
|
||||
'tags': 'count:12',
|
||||
}
|
||||
}, {
|
||||
# quote post
|
||||
'url': 'https://gettr.com/post/pxn5b743a9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# quote with video
|
||||
'url': 'https://gettr.com/post/pxtiiz5ca2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# streaming embed
|
||||
'url': 'https://gettr.com/post/pxlu8p3b13',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'https://gettr.com/post/pv6wp9e24c',
|
||||
'only_matching': True,
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, post_id)
|
||||
api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id)
|
||||
|
||||
api_data = self._download_json(
|
||||
'https://api.gettr.com/u/post/%s?incl="poststats|userinfo"' % post_id, post_id)
|
||||
post_data = api_data.get('data')
|
||||
user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']], dict) or {}
|
||||
|
||||
post_data = try_get(api_data, lambda x: x['result']['data'])
|
||||
user_data = try_get(api_data, lambda x: x['result']['aux']['uinf'][post_data['uid']]) or {}
|
||||
vid = post_data.get('vid')
|
||||
ovid = post_data.get('ovid')
|
||||
|
||||
if post_data.get('nfound'):
|
||||
raise ExtractorError(post_data.get('txt'), expected=True)
|
||||
if post_data.get('p_type') == 'stream':
|
||||
return self.url_result(f'https://gettr.com/streaming/{post_id}', ie='GettrStreaming', video_id=post_id)
|
||||
|
||||
if not (ovid or vid):
|
||||
embed_url = url_or_none(post_data.get('prevsrc'))
|
||||
shared_post_id = traverse_obj(api_data, ('aux', 'shrdpst', '_id'), ('data', 'rpstIds', 0), expected_type=str)
|
||||
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
elif shared_post_id:
|
||||
return self.url_result(f'https://gettr.com/post/{shared_post_id}', ie='Gettr', video_id=shared_post_id)
|
||||
else:
|
||||
raise ExtractorError('There\'s no video in this post.')
|
||||
|
||||
title = description = str_or_none(
|
||||
post_data.get('txt') or self._og_search_description(webpage))
|
||||
|
||||
uploader = str_or_none(
|
||||
user_data.get('nickname')
|
||||
or remove_end(self._og_search_title(webpage), ' on GETTR'))
|
||||
or self._search_regex(r'^(.+?) on GETTR', self._og_search_title(webpage, default=''), 'uploader', fatal=False))
|
||||
|
||||
if uploader:
|
||||
title = '%s - %s' % (uploader, title)
|
||||
|
||||
if not dict_get(post_data, ['vid', 'ovid']):
|
||||
raise ExtractorError('There\'s no video in this post.')
|
||||
|
||||
vid = post_data.get('vid')
|
||||
ovid = post_data.get('ovid')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls') if vid else []
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else ([], {})
|
||||
|
||||
if ovid:
|
||||
formats.append({
|
||||
@@ -86,8 +122,6 @@ class GettrIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(post_data.get('vid_wid')),
|
||||
'height': int_or_none(post_data.get('vid_hgt')),
|
||||
'source_preference': 1,
|
||||
'quality': 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -96,15 +130,84 @@ class GettrIE(InfoExtractor):
|
||||
'id': post_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': url_or_none(
|
||||
urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
|
||||
or self._og_search_thumbnail(webpage)),
|
||||
'timestamp': int_or_none(post_data.get('cdate')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'uploader': uploader,
|
||||
'uploader_id': str_or_none(
|
||||
dict_get(user_data, ['_id', 'username'])
|
||||
or post_data.get('uid')),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'thumbnail': url_or_none(
|
||||
urljoin(self._MEDIA_BASE_URL, post_data.get('main'))
|
||||
or self._html_search_meta(['og:image', 'image'], webpage, 'thumbnail', fatal=False)),
|
||||
'timestamp': float_or_none(dict_get(post_data, ['cdate', 'udate']), scale=1000),
|
||||
'duration': float_or_none(post_data.get('vid_dur')),
|
||||
'tags': post_data.get('htgs'),
|
||||
}
|
||||
|
||||
|
||||
class GettrStreamingIE(GettrBaseIE):
|
||||
_VALID_URL = GettrBaseIE._BASE_REGEX + r'streaming/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://gettr.com/streaming/psoiulc122',
|
||||
'info_dict': {
|
||||
'id': 'psoiulc122',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:56bca4b8f48f1743d9fd03d49c723017',
|
||||
'view_count': int,
|
||||
'uploader': 'Corona Investigative Committee',
|
||||
'uploader_id': 'coronacommittee',
|
||||
'duration': 5180.184,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
'title': 'Day 1: Opening Session of the Grand Jury Proceeding',
|
||||
'timestamp': 1644080997.164,
|
||||
'upload_date': '20220205',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gettr.com/streaming/psfmeefcc1',
|
||||
'info_dict': {
|
||||
'id': 'psfmeefcc1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Session 90: "The Virus Of Power"',
|
||||
'view_count': int,
|
||||
'uploader_id': 'coronacommittee',
|
||||
'description': 'md5:98986acdf656aa836bf36f9c9704c65b',
|
||||
'uploader': 'Corona Investigative Committee',
|
||||
'thumbnail': r're:^https?://.+',
|
||||
'duration': 21872.507,
|
||||
'timestamp': 1643976662.858,
|
||||
'upload_date': '20220204',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._call_api('live/join/%s' % video_id, video_id, data={})
|
||||
|
||||
live_info = video_info['broadcast']
|
||||
live_url = url_or_none(live_info.get('url'))
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
live_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if live_url else ([], {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
|
||||
} for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': try_get(video_info, lambda x: x['postData']['ttl'], str),
|
||||
'description': try_get(video_info, lambda x: x['postData']['dsc'], str),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname'], str),
|
||||
'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id'], str),
|
||||
'view_count': int_or_none(live_info.get('viewsCount')),
|
||||
'timestamp': float_or_none(live_info.get('startAt'), scale=1000),
|
||||
'duration': float_or_none(live_info.get('duration'), scale=1000),
|
||||
'is_live': bool_or_none(live_info.get('isLive')),
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
orderedSet,
|
||||
@@ -67,11 +68,28 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://globoplay.globo.com/v/10248083/',
|
||||
'info_dict': {
|
||||
'id': '10248083',
|
||||
'ext': 'mp4',
|
||||
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
|
||||
'duration': 530.964,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': '698',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
|
||||
video_id, 'Getting cookies')
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||
video_id)['videos'][0]
|
||||
@@ -82,7 +100,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
security = self._download_json(
|
||||
'https://playback.video.globo.com/v1/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
'https://playback.video.globo.com/v2/video-session', video_id, 'Downloading security hash for %s' % video_id,
|
||||
headers={'content-type': 'application/json'}, data=json.dumps({
|
||||
"player_type": "desktop",
|
||||
"video_id": video_id,
|
||||
@@ -92,7 +110,9 @@ class GloboIE(InfoExtractor):
|
||||
"tz": "-3.0:00"
|
||||
}).encode())
|
||||
|
||||
security_hash = security['source']['token']
|
||||
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
|
||||
|
||||
security_hash = security['sources'][0]['token']
|
||||
if not security_hash:
|
||||
message = security.get('message')
|
||||
if message:
|
||||
@@ -115,15 +135,15 @@ class GloboIE(InfoExtractor):
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
source = security['source']['url_parts']
|
||||
source = security['sources'][0]['url_parts']
|
||||
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
|
||||
signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for resource in video['resources']:
|
||||
if resource.get('type') == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
@@ -166,6 +186,7 @@ class GloboArticleIE(InfoExtractor):
|
||||
r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
|
||||
r'\bdata-id=["\'](\d{7,})',
|
||||
r'<div[^>]+\bid=["\'](\d{7,})',
|
||||
r'<bs-player[^>]+\bvideoid=["\'](\d{8,})',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
@@ -193,6 +214,14 @@ class GloboArticleIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ge.globo.com/video/ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094.ghtml',
|
||||
'info_dict': {
|
||||
'id': 'ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094',
|
||||
'title': 'Tá na Área: como foi assistir ao jogo do Palmeiras que a Globo não passou',
|
||||
'description': 'md5:2d089d036c4c9675117d3a56f8c61739',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -208,6 +237,6 @@ class GloboArticleIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||
for video_id in orderedSet(video_ids)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
|
||||
@@ -7,6 +7,7 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
@@ -177,44 +178,38 @@ class GlomexEmbedIE(GlomexBaseIE):
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, origin_url):
|
||||
VALID_SRC = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+'
|
||||
|
||||
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
|
||||
EMBED_RE = r'''(?x)(?:
|
||||
<iframe[^>]+?src=(?P<_q1>%(quot_re)s)(?P<url>%(url_re)s)(?P=_q1)|
|
||||
<(?P<html_tag>glomex-player|div)(?:
|
||||
data-integration-id=(?P<_q2>%(quot_re)s)(?P<integration_html>(?:(?!(?P=_q2)).)+)(?P=_q2)|
|
||||
data-playlist-id=(?P<_q3>%(quot_re)s)(?P<id_html>(?:(?!(?P=_q3)).)+)(?P=_q3)|
|
||||
data-glomex-player=(?P<_q4>%(quot_re)s)(?P<glomex_player>true)(?P=_q4)|
|
||||
[^>]*?
|
||||
)+>|
|
||||
# naive parsing of inline scripts for hard-coded integration parameters
|
||||
<(?P<script_tag>script)[^<]*?>(?:
|
||||
(?P<_stjs1>dataset\.)?integrationId\s*(?(_stjs1)=|:)\s*
|
||||
(?P<_q5>%(quot_re)s)(?P<integration_js>(?:(?!(?P=_q5)).)+)(?P=_q5)\s*(?(_stjs1);|,)?|
|
||||
(?P<_stjs2>dataset\.)?playlistId\s*(?(_stjs2)=|:)\s*
|
||||
(?P<_q6>%(quot_re)s)(?P<id_js>(?:(?!(?P=_q6)).)+)(?P=_q6)\s*(?(_stjs2);|,)?|
|
||||
(?:\s|.)*?
|
||||
)+</script>
|
||||
)''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}
|
||||
quot_re = r'["\']'
|
||||
|
||||
for mobj in re.finditer(EMBED_RE, webpage):
|
||||
mdict = mobj.groupdict()
|
||||
if mdict.get('url'):
|
||||
url = unescapeHTML(mdict['url'])
|
||||
if not cls.suitable(url):
|
||||
continue
|
||||
regex = fr'''(?x)
|
||||
<iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
|
||||
(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
|
||||
)(?P=q)'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
if cls.suitable(url):
|
||||
yield cls._smuggle_origin_url(url, origin_url)
|
||||
elif mdict.get('html_tag'):
|
||||
if mdict['html_tag'] == 'div' and not mdict.get('glomex_player'):
|
||||
continue
|
||||
if not mdict.get('video_id_html') or not mdict.get('integration_html'):
|
||||
continue
|
||||
yield cls.build_player_url(mdict['video_id_html'], mdict['integration_html'], origin_url)
|
||||
elif mdict.get('script_tag'):
|
||||
if not mdict.get('video_id_js') or not mdict.get('integration_js'):
|
||||
continue
|
||||
yield cls.build_player_url(mdict['video_id_js'], mdict['integration_js'], origin_url)
|
||||
|
||||
regex = fr'''(?x)
|
||||
<glomex-player [^>]+?>|
|
||||
<div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
|
||||
for mobj in re.finditer(regex, webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('data-integration-id') and attrs.get('data-playlist-id'):
|
||||
yield cls.build_player_url(attrs['data-playlist-id'], attrs['data-integration-id'], origin_url)
|
||||
|
||||
# naive parsing of inline scripts for hard-coded integration parameters
|
||||
regex = fr'''(?x)
|
||||
(?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
|
||||
(?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
|
||||
for mobj in re.finditer(r'(?x)<script[^<]*>.+?</script>', webpage):
|
||||
script = mobj.group(0)
|
||||
integration_id = re.search(regex % 'integrationId', script)
|
||||
if not integration_id:
|
||||
continue
|
||||
playlist_id = re.search(regex % 'playlistId', script)
|
||||
if playlist_id:
|
||||
yield cls.build_player_url(playlist_id, integration_id, origin_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, origin_url = self._unsmuggle_origin_url(url)
|
||||
|
||||
@@ -8,36 +8,33 @@ from .common import SearchInfoExtractor
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Google Video search'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = 'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
_WORKING = False
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'gvsearch15:python language',
|
||||
'info_dict': {
|
||||
'id': 'python language',
|
||||
'title': 'python language',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _search_results(self, query):
|
||||
for pagenum in itertools.count():
|
||||
webpage = self._download_webpage(
|
||||
'http://www.google.com/search',
|
||||
'gvsearch:' + query,
|
||||
note='Downloading result page %s' % (pagenum + 1),
|
||||
'http://www.google.com/search', f'gvsearch:{query}',
|
||||
note=f'Downloading result page {pagenum + 1}',
|
||||
query={
|
||||
'tbm': 'vid',
|
||||
'q': query,
|
||||
'start': pagenum * 10,
|
||||
'start': pagenum * self._PAGE_SIZE,
|
||||
'num': self._PAGE_SIZE,
|
||||
'hl': 'en',
|
||||
})
|
||||
|
||||
for hit_idx, mobj in enumerate(re.finditer(
|
||||
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||
if re.search(f'id="vidthumb{hit_idx + 1}"', webpage):
|
||||
yield self.url_result(mobj.group(1))
|
||||
for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
|
||||
yield self.url_result(url)
|
||||
|
||||
if not re.search(r'id="pnnext"', webpage):
|
||||
return
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class HornBunnyIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
|
||||
'md5': 'e20fd862d1894b67564c96f180f43924',
|
||||
'info_dict': {
|
||||
'id': '5227',
|
||||
'ext': 'mp4',
|
||||
'title': 'panty slut jerk off instruction',
|
||||
'duration': 550,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<strong>Views:</strong>\s*(\d+)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
@@ -80,9 +80,6 @@ class HuffPostIE(InfoExtractor):
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
})
|
||||
|
||||
if not formats and data.get('fivemin_id'):
|
||||
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -64,10 +64,7 @@ class ImgGamingBaseIE(InfoExtractor):
|
||||
domain, media_type, media_id, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if playlist_id:
|
||||
if self.get_param('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
|
||||
if self._yes_playlist(playlist_id, media_id):
|
||||
media_type, media_id = 'playlist', playlist_id
|
||||
|
||||
if media_type == 'playlist':
|
||||
@@ -99,7 +96,7 @@ class ImgGamingBaseIE(InfoExtractor):
|
||||
continue
|
||||
if proto == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
media_url, media_id, 'mp4', live=is_live,
|
||||
m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
|
||||
for f in m3u8_formats:
|
||||
f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
|
||||
|
||||
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
@@ -127,6 +127,74 @@ class InstagramBaseIE(InfoExtractor):
|
||||
'like_count': self._get_count(node, 'likes', 'preview_like'),
|
||||
}
|
||||
|
||||
def _extract_product_media(self, product_media):
|
||||
media_id = product_media.get('code') or product_media.get('id')
|
||||
vcodec = product_media.get('video_codec')
|
||||
dash_manifest_raw = product_media.get('video_dash_manifest')
|
||||
videos_list = product_media.get('video_versions')
|
||||
if not (dash_manifest_raw or videos_list):
|
||||
return {}
|
||||
|
||||
formats = [{
|
||||
'format_id': format.get('id'),
|
||||
'url': format.get('url'),
|
||||
'width': format.get('width'),
|
||||
'height': format.get('height'),
|
||||
'vcodec': vcodec,
|
||||
} for format in videos_list or []]
|
||||
if dash_manifest_raw:
|
||||
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail.get('url'),
|
||||
'width': thumbnail.get('width'),
|
||||
'height': thumbnail.get('height')
|
||||
} for thumbnail in traverse_obj(product_media, ('image_versions2', 'candidates')) or []]
|
||||
return {
|
||||
'id': media_id,
|
||||
'duration': float_or_none(product_media.get('video_duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails
|
||||
}
|
||||
|
||||
def _extract_product(self, product_info):
|
||||
if isinstance(product_info, list):
|
||||
product_info = product_info[0]
|
||||
|
||||
user_info = product_info.get('user') or {}
|
||||
info_dict = {
|
||||
'id': product_info.get('code') or product_info.get('id'),
|
||||
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
|
||||
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
|
||||
'timestamp': int_or_none(product_info.get('taken_at')),
|
||||
'channel': user_info.get('username'),
|
||||
'uploader': user_info.get('full_name'),
|
||||
'uploader_id': str_or_none(user_info.get('pk')),
|
||||
'view_count': int_or_none(product_info.get('view_count')),
|
||||
'like_count': int_or_none(product_info.get('like_count')),
|
||||
'comment_count': int_or_none(product_info.get('comment_count')),
|
||||
'http_headers': {
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}
|
||||
}
|
||||
carousel_media = product_info.get('carousel_media')
|
||||
if carousel_media:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
**info_dict,
|
||||
'title': f'Post by {user_info.get("username")}',
|
||||
'entries': [{
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_media),
|
||||
} for product_media in carousel_media],
|
||||
}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**self._extract_product_media(product_info)
|
||||
}
|
||||
|
||||
|
||||
class InstagramIOSIE(InfoExtractor):
|
||||
IE_DESC = 'IOS instagram:// URL'
|
||||
@@ -185,8 +253,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader_id': '2815873',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'channel': 'naomipq',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -202,8 +271,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
'uploader_id': '12246775',
|
||||
'uploader': 'Britney Spears',
|
||||
'channel': 'britneyspears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -249,8 +319,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader_id': '25025320',
|
||||
'uploader': 'Instagram',
|
||||
'channel': 'instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -316,16 +387,19 @@ class InstagramIE(InstagramBaseIE):
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\);',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
if product_item:
|
||||
return self._extract_product(product_item)
|
||||
media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
|
||||
|
||||
if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
|
||||
uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False)
|
||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||
|
||||
description = (
|
||||
traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str)
|
||||
@@ -342,7 +416,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
if nodes:
|
||||
return self.playlist_result(
|
||||
self._extract_nodes(nodes, True), video_id,
|
||||
format_field(uploader_id, template='Post by %s'), description)
|
||||
format_field(username, template='Post by %s'), description)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
@@ -378,12 +452,13 @@ class InstagramIE(InstagramBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': media.get('title') or 'Video by %s' % uploader_id,
|
||||
'title': media.get('title') or 'Video by %s' % username,
|
||||
'description': description,
|
||||
'duration': float_or_none(media.get('video_duration')),
|
||||
'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_id': traverse_obj(media, ('owner', 'id')),
|
||||
'uploader': traverse_obj(media, ('owner', 'full_name')),
|
||||
'channel': username,
|
||||
'like_count': self._get_count(media, 'likes', 'preview_like') or str_to_int(self._search_regex(
|
||||
r'data-log-event="likeCountClick"[^>]*>[^\d]*([\d,\.]+)', webpage, 'like count', fatal=False)),
|
||||
'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
|
||||
@@ -427,7 +502,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
|
||||
'%s' % rhx_gis,
|
||||
'',
|
||||
'%s:%s' % (rhx_gis, csrf_token),
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
|
||||
]
|
||||
|
||||
# try all of the ways to generate a GIS query, and not only use the
|
||||
@@ -578,7 +653,6 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
'X-ASBD-ID': 198387,
|
||||
'X-IG-WWW-Claim': 0,
|
||||
})['reels']
|
||||
entites = []
|
||||
|
||||
full_name = traverse_obj(videos, ('user', 'full_name'))
|
||||
|
||||
@@ -592,41 +666,10 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
username = traverse_obj(user_info, ('user', 'username')) or username
|
||||
full_name = traverse_obj(user_info, ('user', 'full_name')) or full_name
|
||||
|
||||
videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
for video_info in videos:
|
||||
formats = []
|
||||
if isinstance(video_info, list):
|
||||
video_info = video_info[0]
|
||||
vcodec = video_info.get('video_codec')
|
||||
dash_manifest_raw = video_info.get('video_dash_manifest')
|
||||
videos_list = video_info.get('video_versions')
|
||||
if not (dash_manifest_raw or videos_list):
|
||||
continue
|
||||
for format in videos_list:
|
||||
formats.append({
|
||||
'url': format.get('url'),
|
||||
'width': format.get('width'),
|
||||
'height': format.get('height'),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if dash_manifest_raw:
|
||||
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
|
||||
self._sort_formats(formats)
|
||||
thumbnails = [{
|
||||
'url': thumbnail.get('url'),
|
||||
'width': thumbnail.get('width'),
|
||||
'height': thumbnail.get('height')
|
||||
} for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
|
||||
entites.append({
|
||||
'id': video_info.get('id'),
|
||||
'title': f'Story by {username}',
|
||||
'timestamp': int_or_none(video_info.get('taken_at')),
|
||||
'channel': username,
|
||||
'uploader': full_name,
|
||||
'duration': float_or_none(video_info.get('video_duration')),
|
||||
'uploader_id': user_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)
|
||||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
|
||||
return self.playlist_result([{
|
||||
**self._extract_product(highlight),
|
||||
'title': f'Story by {username}',
|
||||
'uploader': full_name,
|
||||
'uploader_id': user_id,
|
||||
} for highlight in highlights], playlist_id=story_id, playlist_title=highlight_title)
|
||||
|
||||
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_unquote
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
@@ -431,6 +433,10 @@ class IqIE(InfoExtractor):
|
||||
'format': '500',
|
||||
},
|
||||
'expected_warnings': ['format is restricted']
|
||||
}, {
|
||||
# VIP-restricted video
|
||||
'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
|
||||
'only_matching': True
|
||||
}]
|
||||
_BID_TAGS = {
|
||||
'100': '240P',
|
||||
@@ -457,8 +463,8 @@ class IqIE(InfoExtractor):
|
||||
_DASH_JS = '''
|
||||
console.log(page.evaluate(function() {
|
||||
var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
|
||||
var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s"; var bid_list = %(bid_list)s;
|
||||
var tm = new Date().getTime();
|
||||
var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
|
||||
var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
|
||||
var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
|
||||
var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
|
||||
var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
|
||||
@@ -472,7 +478,7 @@ class IqIE(InfoExtractor):
|
||||
'src': src,
|
||||
'vt': 0,
|
||||
'rs': 1,
|
||||
'uid': 0,
|
||||
'uid': uid,
|
||||
'ori': 'pcw',
|
||||
'ps': 1,
|
||||
'k_uid': k_uid,
|
||||
@@ -509,12 +515,14 @@ class IqIE(InfoExtractor):
|
||||
'version': '10.0',
|
||||
'dfp': dfp
|
||||
}),
|
||||
'ut': 0, // TODO: Set ut param for VIP members
|
||||
};
|
||||
var enc_params = [];
|
||||
for (var prop in query) {
|
||||
enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
|
||||
}
|
||||
ut_list.forEach(function(ut) {
|
||||
enc_params.push('ut=' + ut);
|
||||
})
|
||||
var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
|
||||
dash_paths[bid] = dash_path;
|
||||
});
|
||||
@@ -571,17 +579,37 @@ class IqIE(InfoExtractor):
|
||||
page_data = next_props['initialState']['play']
|
||||
video_info = page_data['curVideoInfo']
|
||||
|
||||
uid = traverse_obj(
|
||||
self._parse_json(
|
||||
self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False),
|
||||
('data', 'uid'), default=0)
|
||||
|
||||
if uid:
|
||||
vip_data = self._download_json(
|
||||
'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
|
||||
'batch': 1,
|
||||
'platformId': 3,
|
||||
'modeCode': self._get_cookie('mod', 'intl'),
|
||||
'langCode': self._get_cookie('lang', 'en_us'),
|
||||
'deviceId': self._get_cookie('QC005', '')
|
||||
}, fatal=False)
|
||||
ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[])
|
||||
else:
|
||||
ut_list = ['0']
|
||||
|
||||
# bid 0 as an initial format checker
|
||||
dash_paths = self._parse_json(PhantomJSwrapper(self).get(
|
||||
url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % {
|
||||
'tvid': video_info['tvId'],
|
||||
'vid': video_info['vid'],
|
||||
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
|
||||
expected_type=str, default='01010031010018000000'),
|
||||
expected_type=str, default='04022001010011000000'),
|
||||
'uid': uid,
|
||||
'dfp': self._get_cookie('dfp', ''),
|
||||
'mode': self._get_cookie('mod', 'intl'),
|
||||
'lang': self._get_cookie('lang', 'en_us'),
|
||||
'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
|
||||
'ut_list': '[' + ','.join(ut_list) + ']',
|
||||
'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
|
||||
})[1].strip(), video_id)
|
||||
|
||||
@@ -590,9 +618,10 @@ class IqIE(InfoExtractor):
|
||||
urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
|
||||
note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
|
||||
|
||||
preview_time = traverse_obj(initial_format_data, ('boss_ts', 'data', 'previewTime'), expected_type=float_or_none)
|
||||
if preview_time:
|
||||
self.report_warning(f'This preview video is limited to {preview_time} seconds')
|
||||
preview_time = traverse_obj(
|
||||
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
|
||||
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
|
||||
self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds'))
|
||||
|
||||
# TODO: Extract audio-only formats
|
||||
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class Ir90TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||
'info_dict': {
|
||||
'id': '95719',
|
||||
'ext': 'mp4',
|
||||
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'video_url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -301,6 +301,7 @@ class KalturaIE(InfoExtractor):
|
||||
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for f in flavor_assets:
|
||||
# Continue if asset is not ready
|
||||
if f.get('status') != 2:
|
||||
@@ -344,13 +345,14 @@ class KalturaIE(InfoExtractor):
|
||||
if '/playManifest/' in data_url:
|
||||
m3u8_url = sign_url(data_url.replace(
|
||||
'format/url', 'format/applehttp'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if captions:
|
||||
for caption in captions.get('objects', []):
|
||||
# Continue if caption is not ready
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
'md5': '29aca1e47ae68fc28804aca89f29507e',
|
||||
'info_dict': {
|
||||
'id': '48863',
|
||||
'ext': 'flv',
|
||||
'title': 'Ready To Go',
|
||||
},
|
||||
'skip': 'Only available from China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
||||
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
|
||||
gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls)
|
||||
gcid = gcids[-1]
|
||||
|
||||
info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
|
||||
video_info_page = self._download_webpage(
|
||||
info_url, video_id, 'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
|
||||
param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
|
||||
param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
|
||||
key = _md5('xl_mp43651' + param1 + param2)
|
||||
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
parse_qs,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
UnsupportedError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@@ -196,11 +197,11 @@ class LBRYIE(LBRYBaseIE):
|
||||
live_data = self._download_json(
|
||||
f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
|
||||
note='Downloading livestream JSON metadata')['data']
|
||||
if not live_data['live']:
|
||||
raise ExtractorError('This stream is not live', expected=True)
|
||||
streaming_url = final_url = live_data['url']
|
||||
streaming_url = final_url = live_data.get('url')
|
||||
if not final_url and not live_data.get('live'):
|
||||
self.raise_no_formats('This stream is not live', True, claim_id)
|
||||
else:
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
raise UnsupportedError(url)
|
||||
|
||||
info = self._parse_stream(result, url)
|
||||
if determine_ext(final_url) == 'm3u8':
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user