diff --git a/yt_dlp/extractor/tviplayer.py b/yt_dlp/extractor/tviplayer.py index 17f8535099..b2ea3db277 100644 --- a/yt_dlp/extractor/tviplayer.py +++ b/yt_dlp/extractor/tviplayer.py @@ -1,78 +1,143 @@ +import json +import re + from .common import InfoExtractor -from ..utils import traverse_obj +from ..utils import ExtractorError, js_to_json, traverse_obj class TVIPlayerIE(InfoExtractor): - _VALID_URL = r'https?://tviplayer\.iol\.pt(/programa/[\w-]+/[a-f0-9]+)?/\w+/(?P\w+)' + _VALID_URL = r'https?://tviplayer\.iol\.pt/(?:programa/[^/]+/[0-9a-f]+/(?:video|episodio)|video|episodio|[^/]+/[^/]+|[^/]+)/(?P[0-9A-Za-z]+)(?:[/?#]|$)' _TESTS = [{ - 'url': 'https://tviplayer.iol.pt/programa/jornal-das-8/53c6b3903004dc006243d0cf/video/61c8e8b90cf2c7ea0f0f71a9', + 'url': 'https://tviplayer.iol.pt/programa/a-protegida/67a63479d34ef72ee441fa79/episodio/t1e120', 'info_dict': { - 'id': '61c8e8b90cf2c7ea0f0f71a9', + 'id': '689683000cf20ac1d5f35341', 'ext': 'mp4', - 'duration': 4167, - 'title': 'Jornal das 8 - 26 de dezembro de 2021', - 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/', - 'season_number': 8, - 'season': 'Season 8', - }, - }, { - 'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5', - 'info_dict': { - 'id': '62be445f0cf2ea4f0a5218e5', - 'ext': 'mp4', - 'duration': 3255, - 'season': 'Season 1', - 'title': 'Isabel - Episódio 1', - 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/', - 'season_number': 1, - }, - }, { - # no /programa/ - 'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb', - 'info_dict': { - 'id': '62c4131c0cf2f9a86eac06bb', - 'ext': 'mp4', - 'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»', - 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/', - 'season': 'Season 2', - 'duration': 148, - 'season_number': 2, - }, - }, { - # episodio url - 'url': 'https://tviplayer.iol.pt/programa/para-sempre/61716c360cf2365a5ed894c4/episodio/t1e187', - 'info_dict': { - 'id': 't1e187', - 'ext': 'mp4', - 'season': 'Season 1', - 'title': 'Quem denunciou Pedro?', - 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/', - 'duration': 1250, + 'duration': 1593, + 'title': 'A Protegida - Clarice descobre o que une Óscar a Gonçalo e Mónica', + 'thumbnail': 'https://img.iol.pt/image/id/68971037d34ef72ee44941a6/', 'season_number': 1, }, }] def _real_initialize(self): - self.wms_auth_sign_token = self._download_webpage( - 'https://services.iol.pt/matrix?userId=', 'wmsAuthSign', - note='Trying to get wmsAuthSign token') + # try to obtain the wmsAuthSign token; if it fails, continue without it + try: + self.wms_auth_sign_token = self._download_webpage( + 'https://services.iol.pt/matrix?userId=', 'wmsAuthSign', + note='Downloading wmsAuthSign token') + except Exception: + self.wms_auth_sign_token = None + + def _extract_enclosing_js_object(self, webpage, keyword): + """ + Find a JS object (balanced braces) that contains keyword (e.g. "videoUrl"). + Returns the text of the object (including braces) or None. + """ + k = re.search(re.escape(keyword), webpage) + if not k: + return None + pos = k.start() + # find an opening brace before pos + start = webpage.rfind('{', 0, pos) + if start == -1: + return None + depth = 0 + for i in range(start, len(webpage)): + ch = webpage[i] + if ch == '{': + depth += 1 + elif ch == '}': + depth -= 1 + if depth == 0: + return webpage[start:i + 1] + return None def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, video_id or 'tviplayer') - json_data = self._search_json( - r'