mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-15 08:58:28 +00:00
Merge 35bba89cdb
into 38c2bf4026
This commit is contained in:
commit
d1e23c0852
@ -2309,6 +2309,7 @@
|
|||||||
UstudioIE,
|
UstudioIE,
|
||||||
)
|
)
|
||||||
from .utreon import UtreonIE
|
from .utreon import UtreonIE
|
||||||
|
from .vanillo import VanilloIE, VanilloPlaylistIE, VanilloUserIE
|
||||||
from .varzesh3 import Varzesh3IE
|
from .varzesh3 import Varzesh3IE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veo import VeoIE
|
from .veo import VeoIE
|
||||||
|
386
yt_dlp/extractor/vanillo.py
Normal file
386
yt_dlp/extractor/vanillo.py
Normal file
@ -0,0 +1,386 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, parse_iso8601
|
||||||
|
|
||||||
|
# NOTE: Private videos can be downloaded by adding --add-header "authorization: Bearer abcxyz",
|
||||||
|
# but won't work with --cookies-from-browser and --cookies file.txt
|
||||||
|
|
||||||
|
|
||||||
|
class VanilloIE(InfoExtractor):
|
||||||
|
_access_token = None
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
"""
|
||||||
|
Called before extracting each URL. This checks the cookie jar for an access token
|
||||||
|
"""
|
||||||
|
# 1) Check if we already have an access token in our cookies
|
||||||
|
if not self._access_token:
|
||||||
|
self._access_token = self._get_token_from_cookies()
|
||||||
|
|
||||||
|
# If we still have no token, requests remain unauthorized (public only).
|
||||||
|
|
||||||
|
def _get_token_from_cookies(self):
|
||||||
|
"""
|
||||||
|
Attempt to read an access_token from cookies (if any).
|
||||||
|
"""
|
||||||
|
cookie_names = ('access_token', 'ACCESS_TOKEN')
|
||||||
|
# We rename 'path' to '_path' since it's unused in the loop body
|
||||||
|
for domain, path_dict in self._downloader.cookiejar._cookies.items():
|
||||||
|
if 'vanillo.tv' in domain:
|
||||||
|
for _path, cookie_dict in path_dict.items():
|
||||||
|
for name, cookie in cookie_dict.items():
|
||||||
|
if name in cookie_names:
|
||||||
|
# self.to_screen(f'Found vanillo.tv access_token in cookies: {cookie.value}')
|
||||||
|
self.to_screen('Found vanillo.tv access_token in cookies')
|
||||||
|
return cookie.value
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _download_json(self, url_or_request, video_id, note='Downloading JSON', *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Overriding calls to _download_json so that our Authorization
|
||||||
|
header is always set if we have an access token.
|
||||||
|
"""
|
||||||
|
# We call _download_json_handle, passing 'note' as a positional argument
|
||||||
|
res, urlh = self._download_json_handle(url_or_request, video_id, note, *args, **kwargs)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _download_json_handle(self, url_or_request, video_id, note, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Overridden to insert Bearer token if present, while
|
||||||
|
still relying on the default logic for everything else.
|
||||||
|
"""
|
||||||
|
# Because 'headers' can be in kwargs, we need to merge them
|
||||||
|
# carefully with our Authorization header if we have a token
|
||||||
|
custom_headers = kwargs.setdefault('headers', {})
|
||||||
|
if self._access_token:
|
||||||
|
custom_headers['Authorization'] = f'Bearer {self._access_token}'
|
||||||
|
|
||||||
|
# Call the parent method with 'note' as a positional argument
|
||||||
|
return super()._download_json_handle(url_or_request, video_id, note, *args, **kwargs)
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:dev\.|beta\.)?vanillo\.tv/(?:v|embed)/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vanillo.tv/v/iaCi-oTmmGY',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iaCi-oTmmGY',
|
||||||
|
'title': 'Wawa',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': 'https://images.vanillo.tv/V6mYuajeHGsSSPRJKCdRAvvWgHFVGZ00g-ne3TZevss/h:300/aHR0cHM6Ly9pbWFnZXMuY2RuLnZhbmlsbG8udHYvdGh1bWJuYWlsL1RhUGE3TEJFTVBlS205elh2ZWdzLmF2aWY',
|
||||||
|
'uploader_url': 'M7A',
|
||||||
|
'upload_date': '20240309', # YYYYMMDD format, server API provides 2024-03-09T07:56:35.636Z
|
||||||
|
'duration': 5.71,
|
||||||
|
'view_count': 205,
|
||||||
|
'comment_count': 2,
|
||||||
|
'like_count': 4,
|
||||||
|
'dislike_count': 0,
|
||||||
|
'average_rating': 4.2,
|
||||||
|
'categories': ['film_and_animation'],
|
||||||
|
'tags': ['Wawa', 'wawa', 'Wa Wa', 'wa wa', 'WaWa', 'wAwA', 'wA Wa'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vanillo.tv/v/RhSueuQZiKF',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RhSueuQZiKF',
|
||||||
|
'title': "What's New on Vanillo - Fall Update",
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': 'https://images.vanillo.tv/7Qfelvn1-4waFjX3rIc1FkfpB9jOJqqLlvieD5i3mlA/h:300/aHR0cHM6Ly9pbWFnZXMuY2RuLnZhbmlsbG8udHYvdGh1bWJuYWlsL3JsMmR5ajJFcnozMEphSUd0bTZyLmF2aWY',
|
||||||
|
'uploader_url': 'Vanillo',
|
||||||
|
'upload_date': '20231020', # YYYYMMDD format, server API provides 2023-10-20T04:53:13.718Z
|
||||||
|
'duration': 99.35,
|
||||||
|
'view_count': 368,
|
||||||
|
'comment_count': 2,
|
||||||
|
'like_count': 20,
|
||||||
|
'dislike_count': 0,
|
||||||
|
'average_rating': 4.2,
|
||||||
|
'categories': ['film_and_animation'],
|
||||||
|
'tags': [
|
||||||
|
'fall', 'update', 'fall update', 'autumn', 'autumn update', 'vanillo', 'new features', 'new',
|
||||||
|
'features', 'exciting', 'language', 'switch', 'english', 'descriptive audio', 'descriptive',
|
||||||
|
'audio', 'qualities', 'higher', 'process', 'processing', 'faster', 'fast', '2x', '4x',
|
||||||
|
'twice', 'speed', 'speedy', 'quick', 'chapters',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _get_replies(self, comment_id, limit=3):
|
||||||
|
replies = []
|
||||||
|
replies_url = f'https://api.vanillo.tv/v1/comments/{comment_id}/replies?limit={limit}&reviewing=false'
|
||||||
|
try:
|
||||||
|
replies_data = self._download_json(
|
||||||
|
replies_url, comment_id, note=f'Downloading replies for comment {comment_id}', fatal=False,
|
||||||
|
)
|
||||||
|
except ExtractorError:
|
||||||
|
return replies
|
||||||
|
if replies_data.get('status') != 'success':
|
||||||
|
return replies
|
||||||
|
for reply in replies_data.get('data', {}).get('comments', []):
|
||||||
|
transformed = {
|
||||||
|
'id': reply.get('id'),
|
||||||
|
'author': reply.get('profile', {}).get('username'),
|
||||||
|
'author_id': reply.get('profile', {}).get('id'),
|
||||||
|
'text': reply.get('text'),
|
||||||
|
'timestamp': parse_iso8601(reply.get('createdAt')),
|
||||||
|
}
|
||||||
|
replies.append(transformed)
|
||||||
|
return replies
|
||||||
|
|
||||||
|
def _get_comments(self, video_id, limit=10):
|
||||||
|
all_comments = []
|
||||||
|
page_key = None
|
||||||
|
# Loop to download all comments using pageKey
|
||||||
|
while True:
|
||||||
|
url = (
|
||||||
|
f'https://api.vanillo.tv/v1/videos/{video_id}/comments'
|
||||||
|
f'?limit={limit}&reviewing=false&filter=high_to_low_score'
|
||||||
|
)
|
||||||
|
if page_key:
|
||||||
|
url += f'&pageKey={page_key}'
|
||||||
|
try:
|
||||||
|
comments_data = self._download_json(url, video_id, note='Downloading comments', fatal=False)
|
||||||
|
except ExtractorError:
|
||||||
|
break
|
||||||
|
if comments_data.get('status') != 'success':
|
||||||
|
break
|
||||||
|
data = comments_data.get('data', {})
|
||||||
|
comments = data.get('comments', [])
|
||||||
|
if not comments:
|
||||||
|
break
|
||||||
|
# For each comment, download replies (if any)
|
||||||
|
for comment in comments:
|
||||||
|
transformed = {
|
||||||
|
'id': comment.get('id'),
|
||||||
|
'author': comment.get('profile', {}).get('username'),
|
||||||
|
'author_id': comment.get('profile', {}).get('id'),
|
||||||
|
'text': comment.get('text'),
|
||||||
|
'timestamp': parse_iso8601(comment.get('createdAt')),
|
||||||
|
'replies': self._get_replies(comment.get('id')),
|
||||||
|
}
|
||||||
|
all_comments.append(transformed)
|
||||||
|
page_key = data.get('nextPageKey')
|
||||||
|
if not page_key:
|
||||||
|
break
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
# 1) Retrieve video info (metadata)
|
||||||
|
video_info_url = f'https://api.vanillo.tv/v1/videos/{video_id}?groups=uploader,profile.full'
|
||||||
|
try:
|
||||||
|
video_info = self._download_json(video_info_url, video_id, note='Downloading video info')
|
||||||
|
except ExtractorError as e:
|
||||||
|
# Try to get an HTTP code from the error cause or message
|
||||||
|
http_code = getattr(e.cause, 'code', None)
|
||||||
|
if http_code is None and 'HTTP Error 404' in str(e):
|
||||||
|
http_code = 404
|
||||||
|
if http_code == 404:
|
||||||
|
self.raise_login_required(
|
||||||
|
'Video not found or is private. ' # maybe use report_login here?
|
||||||
|
'Sign in if you have access to this video. Use --cookies or --cookies-from-browser option',
|
||||||
|
method=None,
|
||||||
|
)
|
||||||
|
elif http_code == 403:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Your Internet provider is likely blocked. Try again with another ISP or use VPN',
|
||||||
|
expected=True,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
if video_info.get('status') != 'success':
|
||||||
|
raise ExtractorError('Video info API returned an error', expected=True)
|
||||||
|
data = video_info.get('data', {})
|
||||||
|
title = data.get('title') or video_id
|
||||||
|
description = data.get('description')
|
||||||
|
thumbnail = data.get('thumbnail')
|
||||||
|
|
||||||
|
uploader = data.get('uploader', {})
|
||||||
|
uploader_url = uploader.get('url')
|
||||||
|
|
||||||
|
# 2) Fix the ISO8601 date to remove leftover data
|
||||||
|
upload_date_raw = data.get('publishedAt')
|
||||||
|
upload_date = None
|
||||||
|
if upload_date_raw:
|
||||||
|
# Remove fractional seconds and any extra data after 'Z'
|
||||||
|
upload_date_raw = re.sub(r'\.\d+', '', upload_date_raw)
|
||||||
|
upload_date_raw = re.sub(r'Z.*$', 'Z', upload_date_raw)
|
||||||
|
try:
|
||||||
|
parsed_date = datetime.datetime.fromisoformat(upload_date_raw.replace('Z', '+00:00'))
|
||||||
|
upload_date = parsed_date.strftime('%Y%m%d')
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
duration = data.get('duration')
|
||||||
|
|
||||||
|
# Convert numeric fields safely
|
||||||
|
def safe_int(val):
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
view_count = safe_int(data.get('views'))
|
||||||
|
comment_count = safe_int(data.get('totalComments'))
|
||||||
|
like_count = safe_int(data.get('likes'))
|
||||||
|
dislike_count = safe_int(data.get('dislikes'))
|
||||||
|
|
||||||
|
average_rating = None
|
||||||
|
if like_count is not None and dislike_count is not None:
|
||||||
|
total = like_count + dislike_count
|
||||||
|
if total > 0:
|
||||||
|
average_rating = round((like_count / total) * 5, 1)
|
||||||
|
|
||||||
|
categories = data.get('category')
|
||||||
|
if categories and not isinstance(categories, list):
|
||||||
|
categories = [categories]
|
||||||
|
tags = data.get('tags')
|
||||||
|
|
||||||
|
# 3) Get watch token (required for accessing manifests)
|
||||||
|
watch_token_url = 'https://api.vanillo.tv/v1/watch'
|
||||||
|
post_data = json.dumps({'videoId': video_id}).encode('utf-8')
|
||||||
|
watch_token_resp = self._download_json(
|
||||||
|
watch_token_url, video_id,
|
||||||
|
note='Downloading watch token',
|
||||||
|
data=post_data,
|
||||||
|
headers={'Content-Type': 'application/json'},
|
||||||
|
)
|
||||||
|
watch_token = watch_token_resp.get('data', {}).get('watchToken')
|
||||||
|
if not watch_token:
|
||||||
|
raise ExtractorError('Failed to retrieve watch token', expected=True)
|
||||||
|
|
||||||
|
# 4) Get the HLS & DASH manifest URLs using the watch token
|
||||||
|
manifests_url = f'https://api.vanillo.tv/v1/watch/manifests?watchToken={watch_token}'
|
||||||
|
manifests = self._download_json(manifests_url, video_id, note='Downloading manifests')
|
||||||
|
hls_url = manifests.get('data', {}).get('media', {}).get('hls')
|
||||||
|
# dash_url = manifests.get('data', {}).get('media', {}).get('dash')
|
||||||
|
# DASH provides comically gigantic files. Disabling.
|
||||||
|
# example - 1.7 mb file becomes 15.1 mb, thus short videos for no reason become 100+gb
|
||||||
|
# same for audio tracks, thus RAM usage will be high, and merged file will be even bigger.
|
||||||
|
'''
|
||||||
|
if dash_url:
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
|
dash_url, video_id, mpd_id='dash', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
'''
|
||||||
|
|
||||||
|
# 5) Extract available formats/subtitles from HLS
|
||||||
|
subtitles = {}
|
||||||
|
formats = []
|
||||||
|
if hls_url:
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
hls_url, video_id, ext='mp4', m3u8_id='hls', fatal=False,
|
||||||
|
)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
# 6) Download all comments using pagination with pageKey only if --write-comments
|
||||||
|
if self._downloader.params.get('getcomments'):
|
||||||
|
comments = self._get_comments(video_id, limit=10)
|
||||||
|
else:
|
||||||
|
comments = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'comments': comments,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'average_rating': average_rating,
|
||||||
|
'categories': categories,
|
||||||
|
'tags': tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VanilloPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:dev\.|beta\.)?vanillo\.tv/playlist/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vanillo.tv/playlist/wn9_PM-DTPypZeNy32EE1A',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wn9_PM-DTPypZeNy32EE1A',
|
||||||
|
'title': 'Staff Picks',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
# 1) Download playlist metadata
|
||||||
|
playlist_api_url = f'https://api.vanillo.tv/v1/playlists/{playlist_id}'
|
||||||
|
playlist_info = self._download_json(
|
||||||
|
playlist_api_url, playlist_id, note='Downloading playlist metadata', fatal=False,
|
||||||
|
)
|
||||||
|
playlist_data = playlist_info.get('data', {}).get('playlist', {})
|
||||||
|
playlist_title = playlist_data.get('name') or playlist_id
|
||||||
|
playlist_description = playlist_data.get('description')
|
||||||
|
video_count = playlist_data.get('videoCount') or 20
|
||||||
|
|
||||||
|
# 2) Download the videos using the videoCount as the limit
|
||||||
|
api_url = f'https://api.vanillo.tv/v1/playlists/{playlist_id}/videos?offset=0&limit={video_count}'
|
||||||
|
playlist_data = self._download_json(
|
||||||
|
api_url, playlist_id, note='Downloading playlist videos',
|
||||||
|
)
|
||||||
|
videos = playlist_data.get('data', {}).get('videos', [])
|
||||||
|
entries = []
|
||||||
|
for video in videos:
|
||||||
|
vid = video.get('id')
|
||||||
|
if not vid:
|
||||||
|
continue
|
||||||
|
video_url = f'https://vanillo.tv/v/{vid}'
|
||||||
|
entries.append(self.url_result(video_url, VanilloIE.ie_key()))
|
||||||
|
|
||||||
|
info = self.playlist_result(entries, playlist_id, playlist_title=playlist_title)
|
||||||
|
if playlist_description:
|
||||||
|
info['description'] = playlist_description
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class VanilloUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:dev\.|beta\.)?vanillo\.tv/u/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vanillo.tv/u/f9pKNFrUSG6Qo3pJ4UlGbQ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'f9pKNFrUSG6Qo3pJ4UlGbQ',
|
||||||
|
'title': 'User BakhosVillager videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
entries = []
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
# Loop to paginate through all user videos
|
||||||
|
api_url = (
|
||||||
|
f'https://api.vanillo.tv/v1/profiles/{user_id}/videos'
|
||||||
|
f'?offset={offset}&limit=20&groups=videos.all'
|
||||||
|
)
|
||||||
|
user_data = self._download_json(
|
||||||
|
api_url, user_id, note='Downloading user videos', fatal=False,
|
||||||
|
)
|
||||||
|
videos = user_data.get('data', {}).get('videos', [])
|
||||||
|
if not videos:
|
||||||
|
break
|
||||||
|
for video in videos:
|
||||||
|
vid = video.get('id')
|
||||||
|
if not vid:
|
||||||
|
continue
|
||||||
|
video_url = f'https://vanillo.tv/v/{vid}'
|
||||||
|
entries.append(self.url_result(video_url, VanilloIE.ie_key()))
|
||||||
|
if len(videos) < 20:
|
||||||
|
break
|
||||||
|
offset += 20
|
||||||
|
return self.playlist_result(entries, user_id, playlist_title=f'User {user_id} videos')
|
Loading…
Reference in New Issue
Block a user