1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-15 08:58:28 +00:00

Merge branch 'master' into generic_tests

This commit is contained in:
doe1080 2025-07-27 01:04:04 +09:00 committed by GitHub
commit 327fa5cc13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 434 additions and 430 deletions

View File

@ -1373,6 +1373,7 @@ def test_parse_resolution(self):
self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('ep1x2'), {})
self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920})
def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None)

View File

@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict):
if end_time:
args += ['-t', str(end_time - start_time)]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
# so only local segments can be read unless we also include 'http,https,tcp,tls'
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
# ffmpeg incorrectly handles 'file:' URLs by only removing the
# 'file:' prefix and treating the rest as if it's a normal filepath.
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
# - On *nix, replace 'file://localhost/' with 'file:/'
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']

View File

@ -571,10 +571,6 @@
DWIE,
DWArticleIE,
)
from .eagleplatform import (
ClipYouEmbedIE,
EaglePlatformIE,
)
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .egghead import (
@ -640,6 +636,7 @@
FancodeVodIE,
)
from .fathom import FathomIE
from .faulio import FaulioLiveIE
from .faz import FazIE
from .fc2 import (
FC2IE,
@ -1568,6 +1565,7 @@
)
from .plutotv import PlutoTVIE
from .plvideo import PlVideoIE
from .plyr import PlyrEmbedIE
from .podbayfm import (
PodbayFMChannelIE,
PodbayFMIE,
@ -2166,7 +2164,6 @@
from .trueid import TrueIDIE
from .trunews import TruNewsIE
from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubetugraz import (
TubeTuGrazIE,

View File

@ -1,215 +0,0 @@
import functools
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
smuggle_url,
unsmuggle_url,
url_or_none,
)
class EaglePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
eagleplatform:(?P<custom_host>[^/]+):|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
)
(?P<id>\d+)
'''
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
}, {
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '358597369cf8ba56675c1df15e7af624',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
add_referer = functools.partial(smuggle_url, data={'referrer': url})
res = tuple(super()._extract_embed_urls(url, webpage))
if res:
return map(add_referer, res)
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
rf'''(?xs)
{PLAYER_JS_RE}
<div[^>]+
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
''', webpage)
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage) # noqa: UP031
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
response = super()._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json(
f'http://{host}/api/player_data', video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
title = media['title']
description = media.get('description')
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
age_restriction = media.get('age_restriction')
age_limit = None
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
formats = []
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': f'http-{format_id}',
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}
class ClipYouEmbedIE(InfoExtractor):
_VALID_URL = False
@classmethod
def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url})

View File

@ -0,0 +1,92 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import js_to_json, url_or_none
from ..utils.traversal import traverse_obj
class FaulioLiveIE(InfoExtractor):
_DOMAINS = (
'aloula.sba.sa',
'maraya.sba.net.ae',
'sat7plus.org',
)
_VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'https://aloula.sba.sa/live/saudiatv',
'info_dict': {
'id': 'aloula.faulio.com_saudiatv',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://maraya.sba.net.ae/live/1',
'info_dict': {
'id': 'maraya.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/live/pars',
'info_dict': {
'id': 'sat7.faulio.com_pars',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/fa/live/arabic',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config_data = self._search_json(
r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json)
api_base = config_data['public']['TRANSLATIONS_API_URL']
channel = traverse_obj(
self._download_json(f'{api_base}/channels', video_id),
(lambda k, v: v['url'] == video_id, any))
formats = []
subtitles = {}
if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
mpd_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}',
**traverse_obj(channel, {
'title': ('title', {str}),
'description': ('description', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

View File

@ -3,6 +3,7 @@
class LiveJournalIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
_TEST = {
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',

View File

@ -1,63 +1,63 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
try_get,
unified_timestamp,
)
from ..utils import parse_duration, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class ParlviewIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
_VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
'info_dict': {
'id': '542661',
'id': '3406614',
'ext': 'mp4',
'title': "Australia's Family Law System [Part 2]",
'duration': 5799,
'description': 'md5:7099883b391619dbae435891ca871a62',
'timestamp': 1621430700,
'upload_date': '20210519',
'uploader': 'Joint Committee',
'title': 'Senate Chamber',
'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
'upload_date': '20250325',
'duration': 17999,
'timestamp': 1742939400,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
'only_matching': True,
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
'info_dict': {
'id': 'SV1394.dv',
'ext': 'mp4',
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
'upload_date': '19960822',
'duration': 14765,
'timestamp': 840754200,
},
'params': {
'skip_download': True,
},
}]
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media = self._download_json(self._API_URL % video_id, video_id).get('media')
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
video_details = self._download_json(
f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
stream = try_get(media, lambda x: x['renditions'][0], dict)
if not stream:
self.raise_no_formats('No streams were detected')
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
video_details['files']['file']['url'], video_id, 'mp4')
media_info = self._download_webpage(
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
return {
'id': video_id,
'url': url,
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
'formats': formats,
'duration': int_or_none(media.get('duration')),
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
'description': self._html_search_regex(
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
webpage, 'description', fatal=False),
'uploader': self._html_search_regex(
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
'thumbnail': media.get('staticImage'),
'subtitles': subtitles,
**traverse_obj(video_details, {
'title': (('parlViewTitle', 'title'), {str}, any),
'description': ('parlViewDescription', {str}),
'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
'timestamp': ('recordingFrom', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
}),
}

104
yt_dlp/extractor/plyr.py Normal file
View File

@ -0,0 +1,104 @@
import re
from .common import InfoExtractor
from .vimeo import VimeoIE
class PlyrEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
# data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
'info_dict': {
'id': '522319456',
'ext': 'mp4',
'title': '200.000.000 Mouths (195051)',
'uploader': 'Zeughauskino',
'uploader_url': '',
'comment_count': int,
'like_count': int,
'duration': 963,
'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
'timestamp': 1615467405,
'upload_date': '20210311',
'release_timestamp': 1615467405,
'release_date': '20210311',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
'url': 'https://www.inarcassa.it/',
'info_dict': {
'id': '803435276',
'ext': 'mp4',
'title': 'HOME_Moto_Perpetuo',
'uploader': 'Inarcassa',
'uploader_url': '',
'duration': 38,
'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
'url': 'https://www.profile.nl',
'info_dict': {
'id': 'GF-BjYKoAqI',
'ext': 'mp4',
'title': 'PROFILE: Recruitment Profile',
'description': '',
'media_type': 'video',
'uploader': 'Profile Nederland',
'uploader_id': '@profilenederland',
'uploader_url': 'https://www.youtube.com/@profilenederland',
'channel': 'Profile Nederland',
'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 39,
'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
'categories': ['Autos & Vehicles'],
'tags': [],
'timestamp': 1675692990,
'upload_date': '20230206',
'playable_in_embed': True,
'availability': 'public',
'live_status': 'not_live',
},
}, {
# data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
'url': 'https://www.vnis.edu.vn',
'info_dict': {
'id': 'vnis.edu',
'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
'age_limit': 0,
'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
'timestamp': 1753233356,
'upload_date': '20250723',
},
'playlist_count': 3,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
plyr_embeds = re.finditer(r'''(?x)
<div[^>]+(?:
data-plyr-embed-id="(?P<id1>[^"]+)"[^>]+data-plyr-provider="(?P<provider1>[^"]+)"|
data-plyr-provider="(?P<provider2>[^"]+)"[^>]+data-plyr-embed-id="(?P<id2>[^"]+)"
)[^>]*>''', webpage)
for mobj in plyr_embeds:
embed_id = mobj.group('id1') or mobj.group('id2')
provider = mobj.group('provider1') or mobj.group('provider2')
if provider == 'vimeo':
if not re.match(r'https?://', embed_id):
embed_id = f'https://player.vimeo.com/video/{embed_id}'
yield VimeoIE._smuggle_referrer(embed_id, url)
elif provider == 'youtube':
if not re.match(r'https?://', embed_id):
embed_id = f'https://youtube.com/watch?v={embed_id}'
yield embed_id

View File

@ -8,84 +8,9 @@
class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
_VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
_TESTS = [{
'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'info_dict': {
'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54',
'ext': 'mp4',
'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga',
'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'description': 'md5:a288c794a5ee69e200d8f12982f81a87',
'live_status': 'was_live',
'channel': 'Blau-Weiss Buchholz Tanzsport',
'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport',
'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3',
'duration': 32447,
'upload_date': '20230114',
'timestamp': 1673733618,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'info_dict': {
'id': '95c80c52-6b9a-4ae9-9197-984145adfced',
'ext': 'mp4',
'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022',
'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e',
'live_status': 'was_live',
'channel': 'Deutscher Badminton Verband',
'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband',
'channel_id': '93ca5866-2551-49fc-8424-6db35af58920',
'duration': 41097,
'upload_date': '20220309',
'timestamp': 1646860727.0,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
'info_dict': {
'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce',
'title': 'Formationswochenende Latein 2023 - Samstag',
'display_id': 'ggcbremen/formationswochenende-latein-2023',
'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2',
'live_status': 'was_live',
'channel': 'Grün-Gold-Club Bremen e.V.',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
},
'playlist_count': 3,
'playlist': [{
'info_dict': {
'id': '988e1fea-9d44-4fab-8c72-3085fb667547',
'ext': 'mp4',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel': 'Grün-Gold-Club Bremen e.V.',
'duration': 86,
'title': 'Formationswochenende Latein 2023 - Samstag Part 1',
'upload_date': '20230225',
'timestamp': 1677349909,
'live_status': 'was_live',
},
}],
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}, {
# Single-part video, direct link
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
'md5': '35c11a19395c938cdd076b93bda54cde',
'info_dict': {
@ -100,7 +25,82 @@ class SportDeutschlandIE(InfoExtractor):
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
'duration': 12287.0,
},
}, {
# Single-part video, embedded player link
'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'info_dict': {
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
'ext': 'mp4',
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'channel': 'Rostock Griffins',
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
'live_status': 'was_live',
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
'duration': 12287.0,
},
'params': {'skip_download': True},
}, {
# Multi-part video
'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'info_dict': {
'id': '9f63d737-2444-4e3a-a1ea-840df73fd481',
'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2',
'description': 'md5:0a17da15e48a687e6019639c3452572b',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'live_status': 'was_live',
},
'playlist_count': 2,
'playlist': [{
'info_dict': {
'id': '9f725a94-d43e-40ff-859d-13da3081bb04',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753085197,
'upload_date': '20250721',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': '9f725a94-370e-4477-89ac-1751098e3217',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753128421,
'upload_date': '20250721',
'live_status': 'was_live',
},
}],
}, {
# Livestream
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}]
def _process_video(self, asset_id, video):

View File

@ -5,45 +5,110 @@
from ..utils import (
float_or_none,
int_or_none,
make_archive_id,
strip_or_none,
)
from ..utils.traversal import traverse_obj
class TBSIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
_SITE_INFO = {
'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'),
'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'),
'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'),
}
_VALID_URL = fr'''(?x)
https?://(?:www\.)?(?P<site>{"|".join(map(re.escape, _SITE_INFO))})\.com
(?P<path>/(?:
(?P<watch>watch(?:tnt|tbs|trutv))|
movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+)
)/(?P<id>[^/?#]+))
'''
_TESTS = [{
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life',
'info_dict': {
'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3',
'ext': 'mp4',
'title': 'Monster',
'description': 'Get a first look at the theatrical trailer for TNTs highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
'timestamp': 1508175329,
'upload_date': '20171016',
'title': 'You Debt Your Life',
'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499',
'duration': 1231.0,
'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 6',
'season_number': 6,
'episode': 'Episode 12',
'episode_number': 12,
'timestamp': 1478276239,
'upload_date': '20161104',
},
'params': {
# m3u8 download
'skip_download': True,
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval',
'info_dict': {
'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8',
'ext': 'mp4',
'title': 'And Going Medieval',
'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa',
'duration': 2528.0,
'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)',
'chapters': 'count:7',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 10',
'episode_number': 10,
'timestamp': 1743107520,
'upload_date': '20250327',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out',
'info_dict': {
'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39',
'ext': 'mp4',
'title': 'Got the Bug Out',
'description': 'md5:9eeddf6248f73517b0e5969b8a43c025',
'duration': 1283.0,
'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1570040829,
'upload_date': '20191002',
'_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'only_matching': True,
}, {
'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
'only_matching': True,
}, {
'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
'only_matching': True,
}, {
'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog',
'only_matching': True,
}, {
'url': 'https://www.trutv.com/watchtrutv/east',
'only_matching': True,
}, {
'url': 'https://www.tbs.com/watchtbs/east',
'only_matching': True,
}, {
'url': 'https://www.tntdrama.com/watchtnt/east',
'only_matching': True,
}]
_SOFTWARE_STATEMENT_MAP = {
'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg',
'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA',
}
def _real_extract(self, url):
site, path, display_id = self._match_valid_url(url).groups()
site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch')
is_live = bool(watch)
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)
is_live = 'watchtnt' in path or 'watchtbs' in path
drupal_settings = self._search_json(
r'<script\b[^>]+\bdata-drupal-selector="drupal-settings-json"[^>]*>',
webpage, 'drupal settings', display_id)
video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path)
media_id = video_data['mediaID']
@ -51,10 +116,14 @@ def _real_extract(self, url):
tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse(
drupal_settings['ngtv_token_url']).query)
auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {}
site_name = auth_info.get('siteName') or self._SITE_INFO[site][0]
software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1]
info = self._extract_ngtv_info(
media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], {
media_id, tokenizer_query, software_statement, {
'url': url,
'site_name': site[:3].upper(),
'site_name': site_name,
'auth_required': video_data.get('authRequired') == '1' or is_live,
'is_live': is_live,
})
@ -87,4 +156,6 @@ def _real_extract(self, url):
'thumbnails': thumbnails,
'is_live': is_live,
})
if site == 'trutv':
info['_old_archive_ids'] = [make_archive_id(site, media_id)]
return info

View File

@ -1,71 +0,0 @@
from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
parse_iso8601,
)
class TruTVIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
'title': 'Sunlight-Activated Flower',
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'
def _real_extract(self, url):
series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
if video_id:
path = 'episode'
display_id = video_id
else:
path = 'series/clip'
display_id = clip_slug
data = self._download_json(
f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}',
display_id)
video_data = data['episode'] if video_id else data['info']
media_id = video_data['mediaId']
title = video_data['title'].strip()
info = self._extract_ngtv_info(
media_id, {}, self._SOFTWARE_STATEMENT, {
'url': url,
'site_name': 'truTV',
'auth_required': video_data.get('isAuthRequired'),
})
thumbnails = []
for image in video_data.get('images', []):
image_url = image.get('srcUrl')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(video_data.get('publicationDate')),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('seasonNum')),
'episode_number': int_or_none(video_data.get('episodeNum')),
})
return info

View File

@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d
'end_time': start_time + chapter_duration,
})
if is_live:
for f in formats:
# Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403
f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']}
return {
'formats': formats,
'chapters': chapters,

View File

@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False):
if mobj:
return {'height': int(mobj.group(1)) * 540}
if lenient:
mobj = re.search(r'(?<!\d)(\d{2,5})w(?![a-zA-Z0-9])', s)
if mobj:
return {'width': int(mobj.group(1))}
return {}