mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
[ie/Sejm] Fix after provider switch (#9669)
This commit is contained in:
parent
4e7c1ea346
commit
bf05fb45fb
@ -1849,7 +1849,7 @@
|
||||
SCTEIE,
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .sejmpl import SejmIE
|
||||
from .sejmpl import SejmBlueonlineIE, SejmIE
|
||||
from .sen import SenIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import (
|
||||
|
@ -1,15 +1,15 @@
|
||||
import datetime as dt
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .redge import RedCDNLivxIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def is_dst(date):
|
||||
@ -30,7 +30,7 @@ class SejmIE(InfoExtractor):
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
|
||||
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
|
||||
r'https?://(?:(?:www\.)?sejm\.gov\.pl|sejm-embed\.redcdn\.pl)/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
|
||||
)
|
||||
IE_NAME = 'sejm'
|
||||
|
||||
@ -46,7 +46,7 @@ class SejmIE(InfoExtractor):
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC01-722340000000-722360145000',
|
||||
'id': 'ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
|
||||
@ -54,7 +54,7 @@ class SejmIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC30-722340000000-722360145000',
|
||||
'id': 'ENC30-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
|
||||
@ -62,7 +62,7 @@ class SejmIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC31-722340000000-722360145000',
|
||||
'id': 'ENC31-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
|
||||
@ -70,7 +70,7 @@ class SejmIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC32-722340000000-722360145000',
|
||||
'id': 'ENC32-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
|
||||
@ -79,7 +79,7 @@ class SejmIE(InfoExtractor):
|
||||
}, {
|
||||
# sign lang interpreter
|
||||
'info_dict': {
|
||||
'id': 'Migacz-ENC01-1-722340000000-722360145000',
|
||||
'id': 'Migacz-ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
|
||||
@ -98,7 +98,7 @@ class SejmIE(InfoExtractor):
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC08-1-503831270000-503840040000',
|
||||
'id': 'ENC08-9377A9D65518E9A5C125808E002E9FF2-503831270000-503840040000',
|
||||
'ext': 'mp4',
|
||||
'duration': 8770,
|
||||
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
|
||||
@ -130,11 +130,15 @@ class SejmIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# API (publicly documented) provides some metadata, and starting at 10th term, m3u8 URLs. Before then it's broken.
|
||||
# Frame provides timeframe and cameras available (including SLI; except for 7th term, where it provides a URL),
|
||||
# but is missing other necessary metadata (live_status, title).
|
||||
# Transmisje_arch JSON provides useful metadata (only place with live_status!), but not URLs/cameras.
|
||||
term, video_id = self._match_valid_url(url).group('term', 'id')
|
||||
frame = self._download_webpage(
|
||||
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
|
||||
f'https://www.sejm.gov.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
|
||||
video_id)
|
||||
# despite it says "transmisje_arch", it works for live streams too!
|
||||
player_config = self._search_json(r'var\splayerConfig\s*=\s*', frame, 'player config', video_id, transform_source=js_to_json)
|
||||
data = self._download_json(
|
||||
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
|
||||
video_id)
|
||||
@ -162,49 +166,35 @@ def _real_extract(self, url):
|
||||
|
||||
entries = []
|
||||
|
||||
def add_entry(file, legacy_file=False):
|
||||
if not file:
|
||||
return
|
||||
file = self._proto_relative_url(file)
|
||||
if not legacy_file:
|
||||
file = update_url_query(file, {'startTime': start_time})
|
||||
if stop_time is not None:
|
||||
file = update_url_query(file, {'stopTime': stop_time})
|
||||
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
|
||||
common_info = {
|
||||
'url': file,
|
||||
'duration': duration,
|
||||
}
|
||||
if legacy_file:
|
||||
def add_entry(camera):
|
||||
if player_config.get('isMP4'):
|
||||
# Special case in 7th term. Instead of a camera name, this is a URL to a simple MP4 file.
|
||||
entries.append({
|
||||
**common_info,
|
||||
'url': self._proto_relative_url(camera),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
})
|
||||
else:
|
||||
entries.append({
|
||||
**common_info,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RedCDNLivxIE.ie_key(),
|
||||
'id': stream_id,
|
||||
'title': join_nonempty(title, stream_id, delim=' - '),
|
||||
})
|
||||
return
|
||||
url = f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/manifest.mpd?start={start_time}'
|
||||
if stop_time is not None:
|
||||
url = update_url_query(url, {'stop': stop_time})
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'ie_key': SejmBlueonlineIE.ie_key(),
|
||||
'id': camera,
|
||||
'duration': duration,
|
||||
'title': join_nonempty(title, camera, delim=' - '),
|
||||
'live_status': live_status,
|
||||
})
|
||||
|
||||
cameras = self._search_json(
|
||||
r'var\s+cameras\s*=', frame, 'camera list', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
|
||||
if camera_file.get('flv'):
|
||||
add_entry(camera_file['flv'])
|
||||
elif camera_file.get('mp4'):
|
||||
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
|
||||
add_entry(camera_file['mp4'], legacy_file=True)
|
||||
else:
|
||||
self.report_warning('Unknown camera stream type found')
|
||||
for camera in player_config['cameras']:
|
||||
add_entry(camera)
|
||||
|
||||
if params.get('mig'):
|
||||
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
|
||||
add_entry(player_config['sli'])
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@ -216,3 +206,31 @@ def add_entry(file, legacy_file=False):
|
||||
'live_status': live_status,
|
||||
'location': strip_or_none(data.get('location')),
|
||||
}
|
||||
|
||||
|
||||
class SejmBlueonlineIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'https?://sejm\.c\.blueonline\.tv//?stream/(?P<camera>[\dA-Za-z-]+)/(?P<id>[\dA-F]+)/(?:playlist.m3u8|manifest.mpd)\?'
|
||||
_TESTS = [{
|
||||
'url': 'https://sejm.c.blueonline.tv/stream/Migacz-ENC01/6181EF1AD9CEEBB5C1258A6D006452B5/manifest.mpd?start=722340000000&stop=722360145000',
|
||||
'info_dict': {
|
||||
'id': 'Migacz-ENC01-6181EF1AD9CEEBB5C1258A6D006452B5-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'title': '_',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
camera, video_id = self._match_valid_url(url).group('camera', 'id')
|
||||
qs = urllib.parse.urlparse(url).query
|
||||
query = urllib.parse.parse_qs(qs)
|
||||
start_time = try_get(query, lambda q: q['start'][0])
|
||||
stop_time = try_get(query, lambda q: q['stop'][0])
|
||||
formats = []
|
||||
formats.extend(self._extract_m3u8_formats(f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/playlist.m3u8?{qs}', video_id, live=stop_time is None))
|
||||
formats.extend(self._extract_mpd_formats(f'https://sejm.c.blueonline.tv/stream/{camera}/{video_id}/manifest.mpd?{qs}', video_id))
|
||||
return {
|
||||
'id': join_nonempty(camera, video_id, start_time, stop_time),
|
||||
'title': '_',
|
||||
'formats': formats,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user