1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00
This commit is contained in:
fries1234 2025-06-26 19:34:49 -07:00 committed by GitHub
commit 5d1513c483
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 76 additions and 7 deletions

View File

@ -2241,6 +2241,7 @@
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import ( from .tvw import (
TvwIE, TvwIE,
TvwNewsIE,
TvwTvChannelsIE, TvwTvChannelsIE,
) )
from .tweakers import TweakersIE from .tweakers import TweakersIE

View File

@ -1,6 +1,6 @@
import json import json
from .common import InfoExtractor from .common import ExtractorError, HTTPError, InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
extract_attributes, extract_attributes,
@ -10,12 +10,31 @@
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwIE(InfoExtractor): class TvwBaseIE(InfoExtractor):
def _download_tvw_webpage(self, url, video_id):
try:
return self._download_webpage(url, video_id, headers={
# yt-dlp's default user-agents are too old and blocked by cloudflare
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
self.report_warning('Got HTTP Error 403, retrying')
# Retry with impersonation if hardcoded UA is insufficient to bypass cloudflare
return self._download_webpage(url, video_id, impersonate=True)
class TvwIE(TvwBaseIE):
IE_NAME = 'tvw' IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)' _VALID_URL = [
r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)',
r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703', 'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -75,11 +94,25 @@ class TvwIE(InfoExtractor):
'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'display_id': 'washington-to-washington-a-new-space-race-2022041111',
'categories': ['Washington to Washington', 'General Interest'], 'categories': ['Washington to Washington', 'General Interest'],
}, },
}, {
'url': 'https://tvw.org/watch?eventID=2025041235',
'md5': '7d697c02f110b37d6a47622ea608ca90',
'info_dict': {
'id': '2025041235',
'ext': 'mp4',
'title': 'Legislative Review -- April 18',
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
'description': 'Legislative Review features highlights from Friday\'s legislative activity (4/18/25).',
'timestamp': 1745006400,
'upload_date': '20250418',
'location': 'Hayner Media Center',
'categories': ['Legislative Review'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_tvw_webpage(url, display_id)
client_id = self._html_search_meta('clientID', webpage, fatal=True) client_id = self._html_search_meta('clientID', webpage, fatal=True)
video_id = self._html_search_meta('eventID', webpage, fatal=True) video_id = self._html_search_meta('eventID', webpage, fatal=True)
@ -125,7 +158,42 @@ def _real_extract(self, url):
} }
class TvwTvChannelsIE(InfoExtractor): class TvwNewsIE(TvwBaseIE):
IE_NAME = 'tvw:news'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/',
'info_dict': {
'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session',
'title': 'The Impact - Issues to Watch in the 2024 Legislative Session',
'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441',
},
'playlist_count': 6,
}, {
'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/',
'info_dict': {
'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate',
'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate',
'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_tvw_webpage(url, playlist_id)
video_ids = traverse_obj(webpage, (
{find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid'))
return self.playlist_from_matches(
video_ids, playlist_id,
playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
playlist_description=self._og_search_description(webpage, default=None),
getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE)
class TvwTvChannelsIE(TvwBaseIE):
IE_NAME = 'tvw:tvchannels' IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
@ -150,7 +218,7 @@ class TvwTvChannelsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_tvw_webpage(url, video_id)
m3u8_url = traverse_obj(webpage, ( m3u8_url = traverse_obj(webpage, (
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes}, {find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},