mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-13 16:08:29 +00:00
Merge 9316f5c642
into f2919bd28e
This commit is contained in:
commit
8758e88c85
@ -1,78 +1,143 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from ..utils import ExtractorError, js_to_json, traverse_obj
|
||||
|
||||
|
||||
class TVIPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tviplayer\.iol\.pt(/programa/[\w-]+/[a-f0-9]+)?/\w+/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://tviplayer\.iol\.pt/(?:programa/[^/]+/[0-9a-f]+/(?:video|episodio)|video|episodio|[^/]+/[^/]+|[^/]+)/(?P<id>[0-9A-Za-z]+)(?:[/?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tviplayer.iol.pt/programa/jornal-das-8/53c6b3903004dc006243d0cf/video/61c8e8b90cf2c7ea0f0f71a9',
|
||||
'url': 'https://tviplayer.iol.pt/programa/a-protegida/67a63479d34ef72ee441fa79/episodio/t1e120',
|
||||
'info_dict': {
|
||||
'id': '61c8e8b90cf2c7ea0f0f71a9',
|
||||
'id': '689683000cf20ac1d5f35341',
|
||||
'ext': 'mp4',
|
||||
'duration': 4167,
|
||||
'title': 'Jornal das 8 - 26 de dezembro de 2021',
|
||||
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/',
|
||||
'season_number': 8,
|
||||
'season': 'Season 8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5',
|
||||
'info_dict': {
|
||||
'id': '62be445f0cf2ea4f0a5218e5',
|
||||
'ext': 'mp4',
|
||||
'duration': 3255,
|
||||
'season': 'Season 1',
|
||||
'title': 'Isabel - Episódio 1',
|
||||
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/',
|
||||
'season_number': 1,
|
||||
},
|
||||
}, {
|
||||
# no /programa/
|
||||
'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb',
|
||||
'info_dict': {
|
||||
'id': '62c4131c0cf2f9a86eac06bb',
|
||||
'ext': 'mp4',
|
||||
'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»',
|
||||
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/',
|
||||
'season': 'Season 2',
|
||||
'duration': 148,
|
||||
'season_number': 2,
|
||||
},
|
||||
}, {
|
||||
# episodio url
|
||||
'url': 'https://tviplayer.iol.pt/programa/para-sempre/61716c360cf2365a5ed894c4/episodio/t1e187',
|
||||
'info_dict': {
|
||||
'id': 't1e187',
|
||||
'ext': 'mp4',
|
||||
'season': 'Season 1',
|
||||
'title': 'Quem denunciou Pedro?',
|
||||
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/',
|
||||
'duration': 1250,
|
||||
'duration': 1593,
|
||||
'title': 'A Protegida - Clarice descobre o que une Óscar a Gonçalo e Mónica',
|
||||
'thumbnail': 'https://img.iol.pt/image/id/68971037d34ef72ee44941a6/',
|
||||
'season_number': 1,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self.wms_auth_sign_token = self._download_webpage(
|
||||
'https://services.iol.pt/matrix?userId=', 'wmsAuthSign',
|
||||
note='Trying to get wmsAuthSign token')
|
||||
# try to obtain the wmsAuthSign token; if it fails, continue without it
|
||||
try:
|
||||
self.wms_auth_sign_token = self._download_webpage(
|
||||
'https://services.iol.pt/matrix?userId=', 'wmsAuthSign',
|
||||
note='Downloading wmsAuthSign token')
|
||||
except Exception:
|
||||
self.wms_auth_sign_token = None
|
||||
|
||||
def _extract_enclosing_js_object(self, webpage, keyword):
|
||||
"""
|
||||
Find a JS object (balanced braces) that contains keyword (e.g. "videoUrl").
|
||||
Returns the text of the object (including braces) or None.
|
||||
"""
|
||||
k = re.search(re.escape(keyword), webpage)
|
||||
if not k:
|
||||
return None
|
||||
pos = k.start()
|
||||
# find an opening brace before pos
|
||||
start = webpage.rfind('{', 0, pos)
|
||||
if start == -1:
|
||||
return None
|
||||
depth = 0
|
||||
for i in range(start, len(webpage)):
|
||||
ch = webpage[i]
|
||||
if ch == '{':
|
||||
depth += 1
|
||||
elif ch == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return webpage[start:i + 1]
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, video_id or 'tviplayer')
|
||||
|
||||
json_data = self._search_json(
|
||||
r'<script>\s*jsonData\s*=', webpage, 'json_data', video_id)
|
||||
video_info = None
|
||||
|
||||
# 1) Try to find a literal "const opts = { ... };" block first
|
||||
m_opts = re.search(r'const\s+opts\s*=\s*({.*?})\s*;', webpage, flags=re.S)
|
||||
if m_opts:
|
||||
try:
|
||||
opts = self._parse_json(m_opts.group(1), video_id or 'tviplayer', transform_source=js_to_json)
|
||||
except Exception:
|
||||
opts = None
|
||||
if opts:
|
||||
# try opts.video[0] or opts itself
|
||||
video_info = traverse_obj(opts, ('video', 0)) or opts.get('video') or opts
|
||||
|
||||
# 2) If not found, try to extract any JS object that contains "videoUrl"
|
||||
if not video_info:
|
||||
obj_text = self._extract_enclosing_js_object(webpage, 'videoUrl')
|
||||
if obj_text:
|
||||
try:
|
||||
parsed = self._parse_json(obj_text, video_id or 'tviplayer', transform_source=js_to_json)
|
||||
except Exception:
|
||||
# fallback: try to json.loads after small cleanup
|
||||
try:
|
||||
cleaned = re.sub(r',\s*([}\]])', r'\1', obj_text).replace("'", '"')
|
||||
parsed = json.loads(cleaned)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if parsed:
|
||||
# parsed might be the video object or contain video: [...]
|
||||
if isinstance(parsed, dict):
|
||||
video_info = traverse_obj(parsed, ('video', 0)) or parsed
|
||||
|
||||
# 3) Legacy fallback: jsonData = {...}
|
||||
if not video_info:
|
||||
try:
|
||||
jd = self._search_json(r'jsonData\s*=', webpage, 'json data', video_id)
|
||||
if jd:
|
||||
video_info = traverse_obj(jd, ('video', 0)) or jd
|
||||
except ExtractorError:
|
||||
video_info = None
|
||||
|
||||
# 4) Last resort: search for a direct "videoUrl" key anywhere and build minimal object
|
||||
if not video_info:
|
||||
m = re.search(r'["\']videoUrl["\']\s*:\s*["\'](https?://[^"\']+)["\']', webpage, flags=re.S)
|
||||
if m:
|
||||
video_info = {
|
||||
'id': video_id or None,
|
||||
'videoUrl': m.group(1),
|
||||
}
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to locate video data in webpage', expected=True)
|
||||
|
||||
# Determine id/title/thumbnail/duration/videoUrl
|
||||
vid = video_info.get('id') or video_id
|
||||
title = video_info.get('title') or self._og_search_title(webpage)
|
||||
thumbnail = video_info.get('cover') or video_info.get('thumbnail') or self._og_search_thumbnail(webpage)
|
||||
duration = video_info.get('duration')
|
||||
try:
|
||||
duration = int(duration) if duration is not None else None
|
||||
except Exception:
|
||||
try:
|
||||
duration = int(float(duration))
|
||||
except Exception:
|
||||
duration = None
|
||||
|
||||
video_url = video_info.get('videoUrl') or video_info.get('url') or video_info.get('video_url')
|
||||
if not video_url:
|
||||
raise ExtractorError('No video URL found in the page data', expected=True)
|
||||
|
||||
# append token if we have it
|
||||
if self.wms_auth_sign_token:
|
||||
sep = '&' if '?' in video_url else '?'
|
||||
video_url = f'{video_url}{sep}wmsAuthSign={self.wms_auth_sign_token}'
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, vid or video_id, ext='mp4')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'{json_data["videoUrl"]}?wmsAuthSign={self.wms_auth_sign_token}',
|
||||
video_id, ext='mp4')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_data.get('title') or self._og_search_title(webpage),
|
||||
'thumbnail': json_data.get('cover') or self._og_search_thumbnail(webpage),
|
||||
'duration': json_data.get('duration'),
|
||||
'id': vid or video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'season_number': traverse_obj(json_data, ('program', 'seasonNum')),
|
||||
'season_number': traverse_obj(video_info, ('program', 'seasonNum')),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user