From 0adeb1e54b2d7e95cd19999e71013877850f8f41 Mon Sep 17 00:00:00 2001 From: ischmidt20 Date: Thu, 24 Jul 2025 18:35:48 -0400 Subject: [PATCH] [ie/tbs] Fix truTV support (#9683) Closes #3400 Authored by: ischmidt20, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/tbs.py | 113 ++++++++++++++++++++++++++------ yt_dlp/extractor/trutv.py | 71 -------------------- yt_dlp/extractor/turner.py | 5 ++ 4 files changed, 97 insertions(+), 93 deletions(-) delete mode 100644 yt_dlp/extractor/trutv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 59a61e060..1aa2927f8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2166,7 +2166,6 @@ from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 80534731e..f8891671f 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -5,45 +5,110 @@ from ..utils import ( float_or_none, int_or_none, + make_archive_id, strip_or_none, ) +from ..utils.traversal import traverse_obj class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com(?P/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P[^/?#]+))' + _SITE_INFO = { + 'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'), + 'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'), + 'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'), + } + _VALID_URL = fr'''(?x) + https?://(?:www\.)?(?P{"|".join(map(re.escape, _SITE_INFO))})\.com + (?P/(?: + (?Pwatch(?:tnt|tbs|trutv))| + movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+) + )/(?P[^/?#]+)) + ''' _TESTS = [{ - 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life', 'info_dict': { - 'id': '8d384cde33b89f3a43ce5329de42903ed5099887', + 'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3', 'ext': 'mp4', - 'title': 'Monster', - 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', - 'timestamp': 1508175329, - 'upload_date': '20171016', + 'title': 'You Debt Your Life', + 'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499', + 'duration': 1231.0, + 'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Episode 12', + 'episode_number': 12, + 'timestamp': 1478276239, + 'upload_date': '20161104', }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval', + 'info_dict': { + 'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8', + 'ext': 'mp4', + 'title': 'And Going Medieval', + 'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa', + 'duration': 2528.0, + 'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)', + 'chapters': 'count:7', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 10', + 'episode_number': 10, + 'timestamp': 1743107520, + 'upload_date': '20250327', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out', + 'info_dict': { + 'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39', + 'ext': 'mp4', + 'title': 'Got the Bug Out', + 'description': 'md5:9eeddf6248f73517b0e5969b8a43c025', + 'duration': 1283.0, + 'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)', + 'chapters': 'count:4', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1570040829, + 'upload_date': '20191002', + '_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', + 'only_matching': True, }, { 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'only_matching': True, }, { 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog', + 'only_matching': True, + }, { + 'url': 'https://www.trutv.com/watchtrutv/east', + 'only_matching': True, + }, { + 'url': 'https://www.tbs.com/watchtbs/east', + 'only_matching': True, + }, { + 'url': 'https://www.tntdrama.com/watchtnt/east', + 'only_matching': True, }] - _SOFTWARE_STATEMENT_MAP = { - 'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg', - 'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA', - } def _real_extract(self, url): - site, path, display_id = self._match_valid_url(url).groups() + site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch') + is_live = bool(watch) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r']+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})', - webpage, 'drupal setting'), display_id) - is_live = 'watchtnt' in path or 'watchtbs' in path + drupal_settings = self._search_json( + r']+\bdata-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'drupal settings', display_id) video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) media_id = video_data['mediaID'] @@ -51,10 +116,14 @@ def _real_extract(self, url): tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( drupal_settings['ngtv_token_url']).query) + auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {} + site_name = auth_info.get('siteName') or self._SITE_INFO[site][0] + software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1] + info = self._extract_ngtv_info( - media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], { + media_id, tokenizer_query, software_statement, { 'url': url, - 'site_name': site[:3].upper(), + 'site_name': site_name, 'auth_required': video_data.get('authRequired') == '1' or is_live, 'is_live': is_live, }) @@ -87,4 +156,6 @@ def _real_extract(self, url): 'thumbnails': thumbnails, 'is_live': is_live, }) + if site == 'trutv': + info['_old_archive_ids'] = [make_archive_id(site, media_id)] return info diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py deleted file mode 100644 index c1d0cb0d1..000000000 --- a/yt_dlp/extractor/trutv.py +++ /dev/null @@ -1,71 +0,0 @@ -from .turner import TurnerBaseIE -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))' - _TEST = { - 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', - 'info_dict': { - 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', - 'ext': 'mp4', - 'title': 'Sunlight-Activated Flower', - 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q' - - def _real_extract(self, url): - series_slug, clip_slug, video_id = self._match_valid_url(url).groups() - - if video_id: - path = 'episode' - display_id = video_id - else: - path = 'series/clip' - display_id = clip_slug - - data = self._download_json( - f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}', - display_id) - video_data = data['episode'] if video_id else data['info'] - media_id = video_data['mediaId'] - title = video_data['title'].strip() - - info = self._extract_ngtv_info( - media_id, {}, self._SOFTWARE_STATEMENT, { - 'url': url, - 'site_name': 'truTV', - 'auth_required': video_data.get('isAuthRequired'), - }) - - thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('srcUrl') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) - - info.update({ - 'id': media_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(video_data.get('publicationDate')), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('seasonNum')), - 'episode_number': int_or_none(video_data.get('episodeNum')), - }) - return info diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 4493705e9..a1a7fd690 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d 'end_time': start_time + chapter_duration, }) + if is_live: + for f in formats: + # Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403 + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']} + return { 'formats': formats, 'chapters': chapters,