1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00
This commit is contained in:
LN Liberda 2025-06-17 18:13:35 +02:00 committed by GitHub
commit 4a1f41dd39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 66 additions and 48 deletions

View File

@ -1849,7 +1849,7 @@
SCTEIE,
SCTECourseIE,
)
from .sejmpl import SejmIE
from .sejmpl import SejmBlueonlineIE, SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import (

View File

@ -1,15 +1,15 @@
import datetime as dt
import urllib.parse
from .common import InfoExtractor
from .redge import RedCDNLivxIE
from ..utils import (
clean_html,
join_nonempty,
js_to_json,
strip_or_none,
try_get,
update_url_query,
)
from ..utils.traversal import traverse_obj
def is_dst(date):
@ -30,7 +30,7 @@ class SejmIE(InfoExtractor):
_VALID_URL = (
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
r'https?://(?:(?:www\.)?sejm\.gov\.pl|sejm-embed\.redcdn\.pl)/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
)
IE_NAME = 'sejm'
@ -46,7 +46,7 @@ class SejmIE(InfoExtractor):
},
'playlist': [{
'info_dict': {
'id': 'ENC01-722340000000-722360145000',
'id': 'ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
@ -54,7 +54,7 @@ class SejmIE(InfoExtractor):
},
}, {
'info_dict': {
'id': 'ENC30-722340000000-722360145000',
'id': 'ENC30-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
@ -62,7 +62,7 @@ class SejmIE(InfoExtractor):
},
}, {
'info_dict': {
'id': 'ENC31-722340000000-722360145000',
'id': 'ENC31-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
@ -70,7 +70,7 @@ class SejmIE(InfoExtractor):
},
}, {
'info_dict': {
'id': 'ENC32-722340000000-722360145000',
'id': 'ENC32-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
@ -79,7 +79,7 @@ class SejmIE(InfoExtractor):
}, {
# sign lang interpreter
'info_dict': {
'id': 'Migacz-ENC01-1-722340000000-722360145000',
'id': 'Migacz-ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'duration': 20145,
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
@ -98,7 +98,7 @@ class SejmIE(InfoExtractor):
},
'playlist': [{
'info_dict': {
'id': 'ENC08-1-503831270000-503840040000',
'id': 'ENC08-9377A9D65518E9A5C125808E002E9FF2-503831270000-503840040000',
'ext': 'mp4',
'duration': 8770,
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
@ -130,11 +130,15 @@ class SejmIE(InfoExtractor):
}]
def _real_extract(self, url):
# API (publicly documented) provides some metadata, and starting at 10th term, m3u8 URLs. Before then it's broken.
# Frame provides timeframe and cameras available (including SLI; except for 7th term, where it provides a URL),
# but is missing other necessary metadata (live_status, title).
# Transmisje_arch JSON provides useful metadata (only place with live_status!), but not URLs/cameras.
term, video_id = self._match_valid_url(url).group('term', 'id')
frame = self._download_webpage(
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
f'https://www.sejm.gov.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
video_id)
# despite it says "transmisje_arch", it works for live streams too!
player_config = self._search_json(r'var\splayerConfig\s*=\s*', frame, 'player config', video_id, transform_source=js_to_json)
data = self._download_json(
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
video_id)
@ -162,49 +166,35 @@ def _real_extract(self, url):
entries = []
def add_entry(file, legacy_file=False):
if not file:
return
file = self._proto_relative_url(file)
if not legacy_file:
file = update_url_query(file, {'startTime': start_time})
if stop_time is not None:
file = update_url_query(file, {'stopTime': stop_time})
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
common_info = {
'url': file,
'duration': duration,
}
if legacy_file:
def add_entry(camera):
if player_config.get('isMP4'):
# Special case in 7th term. Instead of a camera name, this is a URL to a simple MP4 file.
entries.append({
**common_info,
'url': self._proto_relative_url(camera),
'id': video_id,
'title': title,
'duration': duration,
'live_status': live_status,
})
else:
entries.append({
**common_info,
'_type': 'url_transparent',
'ie_key': RedCDNLivxIE.ie_key(),
'id': stream_id,
'title': join_nonempty(title, stream_id, delim=' - '),
})
return
url = f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/manifest.mpd?start={start_time}'
if stop_time is not None:
url = update_url_query(url, {'stop': stop_time})
entries.append({
'_type': 'url_transparent',
'url': url,
'ie_key': SejmBlueonlineIE.ie_key(),
'id': camera,
'duration': duration,
'title': join_nonempty(title, camera, delim=' - '),
'live_status': live_status,
})
cameras = self._search_json(
r'var\s+cameras\s*=', frame, 'camera list', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
fatal=False) or []
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
if camera_file.get('flv'):
add_entry(camera_file['flv'])
elif camera_file.get('mp4'):
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
add_entry(camera_file['mp4'], legacy_file=True)
else:
self.report_warning('Unknown camera stream type found')
for camera in player_config['cameras']:
add_entry(camera)
if params.get('mig'):
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
add_entry(player_config['sli'])
return {
'_type': 'playlist',
@ -216,3 +206,31 @@ def add_entry(file, legacy_file=False):
'live_status': live_status,
'location': strip_or_none(data.get('location')),
}
class SejmBlueonlineIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'https?://sejm\.c\.blueonline\.tv//?stream/(?P<camera>[\dA-Za-z-]+)/(?P<id>[\dA-F]+)/(?:playlist.m3u8|manifest.mpd)\?'
_TESTS = [{
'url': 'https://sejm.c.blueonline.tv/stream/Migacz-ENC01/6181EF1AD9CEEBB5C1258A6D006452B5/manifest.mpd?start=722340000000&stop=722360145000',
'info_dict': {
'id': 'Migacz-ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
'ext': 'mp4',
'title': '_',
},
}]
def _real_extract(self, url):
camera, video_id = self._match_valid_url(url).group('camera', 'id')
qs = urllib.parse.urlparse(url).query
query = urllib.parse.parse_qs(qs)
start_time = try_get(query, lambda q: q['start'][0])
stop_time = try_get(query, lambda q: q['stop'][0])
formats = []
formats.extend(self._extract_m3u8_formats(f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/playlist.m3u8?{qs}', video_id, live=stop_time is None))
formats.extend(self._extract_mpd_formats(f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/manifest.mpd?{qs}', video_id))
return {
'id': join_nonempty(camera, video_id, start_time, stop_time),
'title': '_',
'formats': formats,
}