mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-08 06:57:27 +00:00
Compare commits
6 Commits
2026.02.04
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
637ae202ac | ||
|
|
23c059a455 | ||
|
|
6f38df31b4 | ||
|
|
442c90da3e | ||
|
|
133cb959be | ||
|
|
c7c45f5289 |
@@ -924,6 +924,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(month_by_name(None), None)
|
self.assertEqual(month_by_name(None), None)
|
||||||
self.assertEqual(month_by_name('December', 'en'), 12)
|
self.assertEqual(month_by_name('December', 'en'), 12)
|
||||||
self.assertEqual(month_by_name('décembre', 'fr'), 12)
|
self.assertEqual(month_by_name('décembre', 'fr'), 12)
|
||||||
|
self.assertEqual(month_by_name('desember', 'is'), 12)
|
||||||
self.assertEqual(month_by_name('December'), 12)
|
self.assertEqual(month_by_name('December'), 12)
|
||||||
self.assertEqual(month_by_name('décembre'), None)
|
self.assertEqual(month_by_name('décembre'), None)
|
||||||
self.assertEqual(month_by_name('Unknown', 'unknown'), None)
|
self.assertEqual(month_by_name('Unknown', 'unknown'), None)
|
||||||
|
|||||||
@@ -311,8 +311,10 @@ from .canalsurmas import CanalsurmasIE
|
|||||||
from .caracoltv import CaracolTvPlayIE
|
from .caracoltv import CaracolTvPlayIE
|
||||||
from .cbc import (
|
from .cbc import (
|
||||||
CBCIE,
|
CBCIE,
|
||||||
|
CBCGemContentIE,
|
||||||
CBCGemIE,
|
CBCGemIE,
|
||||||
CBCGemLiveIE,
|
CBCGemLiveIE,
|
||||||
|
CBCGemOlympicsIE,
|
||||||
CBCGemPlaylistIE,
|
CBCGemPlaylistIE,
|
||||||
CBCListenIE,
|
CBCListenIE,
|
||||||
CBCPlayerIE,
|
CBCPlayerIE,
|
||||||
@@ -1029,6 +1031,10 @@ from .livestream import (
|
|||||||
)
|
)
|
||||||
from .livestreamfails import LivestreamfailsIE
|
from .livestreamfails import LivestreamfailsIE
|
||||||
from .lnk import LnkIE
|
from .lnk import LnkIE
|
||||||
|
from .locipo import (
|
||||||
|
LocipoIE,
|
||||||
|
LocipoPlaylistIE,
|
||||||
|
)
|
||||||
from .loco import LocoIE
|
from .loco import LocoIE
|
||||||
from .loom import (
|
from .loom import (
|
||||||
LoomFolderIE,
|
LoomFolderIE,
|
||||||
@@ -2343,6 +2349,7 @@ from .vimm import (
|
|||||||
)
|
)
|
||||||
from .viously import ViouslyIE
|
from .viously import ViouslyIE
|
||||||
from .viqeo import ViqeoIE
|
from .viqeo import ViqeoIE
|
||||||
|
from .visir import VisirIE
|
||||||
from .viu import (
|
from .viu import (
|
||||||
ViuIE,
|
ViuIE,
|
||||||
ViuOTTIE,
|
ViuOTTIE,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
jwt_decode_hs256,
|
jwt_decode_hs256,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
@@ -25,6 +26,7 @@ from ..utils import (
|
|||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import require, traverse_obj, trim_str
|
from ..utils.traversal import require, traverse_obj, trim_str
|
||||||
|
|
||||||
@@ -540,6 +542,32 @@ class CBCGemBaseIE(InfoExtractor):
|
|||||||
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
||||||
display_id or item_id, query={'device': 'web'})
|
display_id or item_id, query={'device': 'web'})
|
||||||
|
|
||||||
|
def _call_media_api(self, media_id, app_code='gem', display_id=None, headers=None):
|
||||||
|
media_data = self._download_json(
|
||||||
|
'https://services.radio-canada.ca/media/validation/v2/',
|
||||||
|
display_id or media_id, headers=headers, query={
|
||||||
|
'appCode': app_code,
|
||||||
|
'connectionType': 'hd',
|
||||||
|
'deviceType': 'ipad',
|
||||||
|
'multibitrate': 'true',
|
||||||
|
'output': 'json',
|
||||||
|
'tech': 'hls',
|
||||||
|
'manifestVersion': '2',
|
||||||
|
'manifestType': 'desktop',
|
||||||
|
'idMedia': media_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
error_code = traverse_obj(media_data, ('errorCode', {int}))
|
||||||
|
if error_code == 1:
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
if error_code == 35:
|
||||||
|
self.raise_login_required(method='password')
|
||||||
|
if error_code != 0:
|
||||||
|
error_message = join_nonempty(error_code, media_data.get('message'), delim=' - ')
|
||||||
|
raise ExtractorError(f'{self.IE_NAME} said: {error_message}')
|
||||||
|
|
||||||
|
return media_data
|
||||||
|
|
||||||
def _extract_item_info(self, item_info):
|
def _extract_item_info(self, item_info):
|
||||||
episode_number = None
|
episode_number = None
|
||||||
title = traverse_obj(item_info, ('title', {str}))
|
title = traverse_obj(item_info, ('title', {str}))
|
||||||
@@ -567,7 +595,7 @@ class CBCGemBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class CBCGemIE(CBCGemBaseIE):
|
class CBCGemIE(CBCGemBaseIE):
|
||||||
IE_NAME = 'gem.cbc.ca'
|
IE_NAME = 'gem.cbc.ca'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]{2,4})/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# This is a normal, public, TV show video
|
# This is a normal, public, TV show video
|
||||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||||
@@ -709,29 +737,10 @@ class CBCGemIE(CBCGemBaseIE):
|
|||||||
if claims_token := self._fetch_claims_token():
|
if claims_token := self._fetch_claims_token():
|
||||||
headers['x-claims-token'] = claims_token
|
headers['x-claims-token'] = claims_token
|
||||||
|
|
||||||
m3u8_info = self._download_json(
|
m3u8_url = self._call_media_api(
|
||||||
'https://services.radio-canada.ca/media/validation/v2/',
|
item_info['idMedia'], display_id=video_id, headers=headers)['url']
|
||||||
video_id, headers=headers, query={
|
|
||||||
'appCode': 'gem',
|
|
||||||
'connectionType': 'hd',
|
|
||||||
'deviceType': 'ipad',
|
|
||||||
'multibitrate': 'true',
|
|
||||||
'output': 'json',
|
|
||||||
'tech': 'hls',
|
|
||||||
'manifestVersion': '2',
|
|
||||||
'manifestType': 'desktop',
|
|
||||||
'idMedia': item_info['idMedia'],
|
|
||||||
})
|
|
||||||
|
|
||||||
if m3u8_info.get('errorCode') == 1:
|
|
||||||
self.raise_geo_restricted(countries=['CA'])
|
|
||||||
elif m3u8_info.get('errorCode') == 35:
|
|
||||||
self.raise_login_required(method='password')
|
|
||||||
elif m3u8_info.get('errorCode') != 0:
|
|
||||||
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
m3u8_url, video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
@@ -801,7 +810,128 @@ class CBCGemPlaylistIE(CBCGemBaseIE):
|
|||||||
}), series=traverse_obj(show_info, ('title', {str})))
|
}), series=traverse_obj(show_info, ('title', {str})))
|
||||||
|
|
||||||
|
|
||||||
class CBCGemLiveIE(InfoExtractor):
|
class CBCGemContentIE(CBCGemBaseIE):
|
||||||
|
IE_NAME = 'gem.cbc.ca:content'
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?P<id>[0-9a-z-]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Series URL; content_type == 'Season'
|
||||||
|
'url': 'https://gem.cbc.ca/the-tunnel',
|
||||||
|
'playlist_count': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-tunnel',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Miniseries URL; content_type == 'Parts'
|
||||||
|
'url': 'https://gem.cbc.ca/summit-72',
|
||||||
|
'playlist_count': 1,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'summit-72',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Olympics URL; content_type == 'Standalone'
|
||||||
|
'url': 'https://gem.cbc.ca/ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ski Jumping: NH Individual (Women\'s) - Final',
|
||||||
|
'description': 'md5:411c07c8a9a4a36344530b0c726bf8ab',
|
||||||
|
'duration': 12793,
|
||||||
|
'thumbnail': r're:https://[^.]+\.cbc\.ca/.+\.jpg',
|
||||||
|
'release_timestamp': 1770482100,
|
||||||
|
'release_date': '20260207',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Movie URL; content_type == 'Standalone'; requires authentication
|
||||||
|
'url': 'https://gem.cbc.ca/copa-71',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
|
||||||
|
content_type = data['contentType']
|
||||||
|
self.write_debug(f'Routing for content type "{content_type}"')
|
||||||
|
|
||||||
|
if content_type == 'Standalone':
|
||||||
|
new_url = traverse_obj(data, (
|
||||||
|
'header', 'cta', 'media', 'url', {urljoin('https://gem.cbc.ca/')}))
|
||||||
|
if CBCGemOlympicsIE.suitable(new_url):
|
||||||
|
return self.url_result(new_url, CBCGemOlympicsIE)
|
||||||
|
|
||||||
|
# Manually construct non-Olympics standalone URLs to avoid returning trailer URLs
|
||||||
|
return self.url_result(f'https://gem.cbc.ca/{display_id}/s01e01', CBCGemIE)
|
||||||
|
|
||||||
|
# Handle series URLs (content_type == 'Season') and miniseries URLs (content_type == 'Parts')
|
||||||
|
def entries():
|
||||||
|
for playlist_url in traverse_obj(data, (
|
||||||
|
'content', ..., 'lineups', ..., 'url', {urljoin('https://gem.cbc.ca/')},
|
||||||
|
{lambda x: x if CBCGemPlaylistIE.suitable(x) else None},
|
||||||
|
)):
|
||||||
|
yield self.url_result(playlist_url, CBCGemPlaylistIE)
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBCGemOlympicsIE(CBCGemBaseIE):
|
||||||
|
IE_NAME = 'gem.cbc.ca:olympics'
|
||||||
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?P<id>(?:[0-9a-z]+-)+[0-9]{5,})/s01e(?P<media_id>[0-9]{5,})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://gem.cbc.ca/ski-jumping-nh-individual-womens-final-30086/s01e30086',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ski-jumping-nh-individual-womens-final-30086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ski Jumping: NH Individual (Women\'s) - Final',
|
||||||
|
'description': 'md5:411c07c8a9a4a36344530b0c726bf8ab',
|
||||||
|
'duration': 12793,
|
||||||
|
'thumbnail': r're:https://[^.]+\.cbc\.ca/.+\.jpg',
|
||||||
|
'release_timestamp': 1770482100,
|
||||||
|
'release_date': '20260207',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, media_id = self._match_valid_url(url).group('id', 'media_id')
|
||||||
|
|
||||||
|
video_info = self._call_show_api(video_id)
|
||||||
|
item_info = traverse_obj(video_info, (
|
||||||
|
'content', ..., 'lineups', ..., 'items',
|
||||||
|
lambda _, v: v['formattedIdMedia'] == media_id, any, {require('item info')}))
|
||||||
|
|
||||||
|
live_status = {
|
||||||
|
'LiveEvent': 'is_live',
|
||||||
|
'Replay': 'was_live',
|
||||||
|
}.get(item_info.get('type'))
|
||||||
|
|
||||||
|
release_timestamp = traverse_obj(item_info, (
|
||||||
|
'metadata', (('live', 'startDate'), ('replay', 'airDate')), {parse_iso8601}, any))
|
||||||
|
|
||||||
|
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
|
||||||
|
formats = []
|
||||||
|
live_status = 'is_upcoming'
|
||||||
|
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||||
|
else:
|
||||||
|
m3u8_url = self._call_media_api(media_id, 'medianetlive', video_id)['url']
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=live_status == 'is_live')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'live_status': live_status,
|
||||||
|
'release_timestamp': release_timestamp,
|
||||||
|
**traverse_obj(item_info, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('images', 'card', 'url', {url_or_none}),
|
||||||
|
'duration': ('metadata', 'replay', 'duration', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBCGemLiveIE(CBCGemBaseIE):
|
||||||
IE_NAME = 'gem.cbc.ca:live'
|
IE_NAME = 'gem.cbc.ca:live'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -871,7 +1001,6 @@ class CBCGemLiveIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
_GEO_COUNTRIES = ['CA']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -900,19 +1029,8 @@ class CBCGemLiveIE(InfoExtractor):
|
|||||||
live_status = 'is_upcoming'
|
live_status = 'is_upcoming'
|
||||||
self.raise_no_formats('This livestream has not yet started', expected=True)
|
self.raise_no_formats('This livestream has not yet started', expected=True)
|
||||||
else:
|
else:
|
||||||
stream_data = self._download_json(
|
m3u8_url = self._call_media_api(video_stream_id, 'medianetlive', video_id)['url']
|
||||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=live_status == 'is_live')
|
||||||
'appCode': 'medianetlive',
|
|
||||||
'connectionType': 'hd',
|
|
||||||
'deviceType': 'ipad',
|
|
||||||
'idMedia': video_stream_id,
|
|
||||||
'multibitrate': 'true',
|
|
||||||
'output': 'json',
|
|
||||||
'tech': 'hls',
|
|
||||||
'manifestType': 'desktop',
|
|
||||||
})
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|||||||
@@ -3,10 +3,12 @@ import urllib.parse
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@@ -107,6 +109,11 @@ class FirstTVIE(InfoExtractor):
|
|||||||
'timestamp': ('dvr_begin_at', {int_or_none}),
|
'timestamp': ('dvr_begin_at', {int_or_none}),
|
||||||
'upload_date': ('date_air', {unified_strdate}),
|
'upload_date': ('date_air', {unified_strdate}),
|
||||||
'duration': ('duration', {int_or_none}),
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'chapters': ('episodes', lambda _, v: float_or_none(v['from']) is not None, {
|
||||||
|
'start_time': ('from', {float_or_none}),
|
||||||
|
'title': ('name', {str}, {unescapeHTML}),
|
||||||
|
'end_time': ('to', {float_or_none}),
|
||||||
|
}),
|
||||||
}),
|
}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|||||||
209
yt_dlp/extractor/locipo.py
Normal file
209
yt_dlp/extractor/locipo.py
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
import functools
|
||||||
|
import math
|
||||||
|
|
||||||
|
from .streaks import StreaksBaseIE
|
||||||
|
from ..networking import HEADRequest
|
||||||
|
from ..utils import (
|
||||||
|
InAdvancePagedList,
|
||||||
|
clean_html,
|
||||||
|
js_to_json,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_qs,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class LocipoBaseIE(StreaksBaseIE):
|
||||||
|
_API_BASE = 'https://web-api.locipo.jp'
|
||||||
|
_BASE_URL = 'https://locipo.jp'
|
||||||
|
_UUID_RE = r'[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}'
|
||||||
|
|
||||||
|
def _call_api(self, path, item_id, note, fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
f'{self._API_BASE}/{path}', item_id,
|
||||||
|
f'Downloading {note} API JSON',
|
||||||
|
f'Unable to download {note} API JSON',
|
||||||
|
fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class LocipoIE(LocipoBaseIE):
|
||||||
|
_VALID_URL = [
|
||||||
|
fr'https?://locipo\.jp/creative/(?P<id>{LocipoBaseIE._UUID_RE})',
|
||||||
|
fr'https?://locipo\.jp/embed/?\?(?:[^#]+&)?id=(?P<id>{LocipoBaseIE._UUID_RE})',
|
||||||
|
]
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://locipo.jp/creative/fb5ffeaa-398d-45ce-bb49-0e221b5f94f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fb5ffeaa-398d-45ce-bb49-0e221b5f94f1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'リアルカレカノ#4 ~伊達さゆりと勉強しよっ?~',
|
||||||
|
'description': 'md5:70a40c202f3fb7946b61e55fa015094c',
|
||||||
|
'display_id': '5a2947fe596441f5bab88a61b0432d0d',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'modified_date': r're:\d{8}',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'release_timestamp': 1711789200,
|
||||||
|
'release_date': '20240330',
|
||||||
|
'series': 'リアルカレカノ',
|
||||||
|
'series_id': '1142',
|
||||||
|
'tags': 'count:4',
|
||||||
|
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||||
|
'timestamp': 1756984919,
|
||||||
|
'upload_date': '20250904',
|
||||||
|
'uploader': '東海テレビ',
|
||||||
|
'uploader_id': 'locipo-prod',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://locipo.jp/embed/?id=71a334a0-2b25-406f-9d96-88f341f571c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '71a334a0-2b25-406f-9d96-88f341f571c2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#1 オーディション/ゲスト伊藤美来、豊田萌絵',
|
||||||
|
'description': 'md5:5bbcf532474700439cf56ceb6a15630e',
|
||||||
|
'display_id': '0ab32634b884499a84adb25de844c551',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'modified_date': r're:\d{8}',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'release_timestamp': 1751623200,
|
||||||
|
'release_date': '20250704',
|
||||||
|
'series': '声優ラジオのウラカブリ~Locipo出張所~',
|
||||||
|
'series_id': '1454',
|
||||||
|
'tags': 'count:6',
|
||||||
|
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||||
|
'timestamp': 1757002966,
|
||||||
|
'upload_date': '20250904',
|
||||||
|
'uploader': 'テレビ愛知',
|
||||||
|
'uploader_id': 'locipo-prod',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://locipo.jp/creative/bff9950d-229b-4fe9-911a-7fa71a232f35?list=69a5b15c-901f-4828-a336-30c0de7612d3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '69a5b15c-901f-4828-a336-30c0de7612d3',
|
||||||
|
'title': '見て・乗って・語りたい。 東海の鉄道沼',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://locipo.jp/creative/a0751a7f-c7dd-4a10-a7f1-e12720bdf16c?list=006cff3f-ba74-42f0-b4fd-241486ebda2b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a0751a7f-c7dd-4a10-a7f1-e12720bdf16c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#839 人間真空パック',
|
||||||
|
'description': 'md5:9fe190333b6975c5001c8c9cbe20d276',
|
||||||
|
'display_id': 'c2b4c9f4a6d648bd8e3c320e384b9d56',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'modified_date': r're:\d{8}',
|
||||||
|
'modified_timestamp': int,
|
||||||
|
'release_timestamp': 1746239400,
|
||||||
|
'release_date': '20250503',
|
||||||
|
'series': 'でんじろう先生のはぴエネ!',
|
||||||
|
'series_id': '202',
|
||||||
|
'tags': 'count:3',
|
||||||
|
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||||
|
'timestamp': 1756975909,
|
||||||
|
'upload_date': '20250904',
|
||||||
|
'uploader': '中京テレビ',
|
||||||
|
'uploader_id': 'locipo-prod',
|
||||||
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
playlist_id = traverse_obj(parse_qs(url), ('list', -1, {str}))
|
||||||
|
if self._yes_playlist(playlist_id, video_id):
|
||||||
|
return self.url_result(
|
||||||
|
f'{self._BASE_URL}/playlist/{playlist_id}', LocipoPlaylistIE)
|
||||||
|
|
||||||
|
creatives = self._call_api(f'creatives/{video_id}', video_id, 'Creatives')
|
||||||
|
media_id = traverse_obj(creatives, ('media_id', {str}, {require('Streaks media ID')}))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
config = self._search_json(
|
||||||
|
r'window\.__NUXT__\.config\s*=', webpage, 'config', video_id, transform_source=js_to_json)
|
||||||
|
api_key = traverse_obj(config, ('public', 'streaksVodPlaybackApiKey', {str}, {require('api key')}))
|
||||||
|
|
||||||
|
return {
|
||||||
|
**self._extract_from_streaks_api('locipo-prod', media_id, headers={
|
||||||
|
'Origin': 'https://locipo.jp',
|
||||||
|
'X-Streaks-Api-Key': api_key,
|
||||||
|
}),
|
||||||
|
**traverse_obj(creatives, {
|
||||||
|
'title': ('name', {clean_html}),
|
||||||
|
'description': ('description', {clean_html}, filter),
|
||||||
|
'release_timestamp': ('publication_started_at', {parse_iso8601}),
|
||||||
|
'tags': ('keyword', {clean_html}, {lambda x: x.split(',')}, ..., {str.strip}, filter),
|
||||||
|
'uploader': ('company', 'name', {clean_html}, filter),
|
||||||
|
}),
|
||||||
|
**traverse_obj(creatives, ('series', {
|
||||||
|
'series': ('name', {clean_html}, filter),
|
||||||
|
'series_id': ('id', {str_or_none}),
|
||||||
|
})),
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LocipoPlaylistIE(LocipoBaseIE):
|
||||||
|
_VALID_URL = [
|
||||||
|
fr'https?://locipo\.jp/(?P<type>playlist)/(?P<id>{LocipoBaseIE._UUID_RE})',
|
||||||
|
r'https?://locipo\.jp/(?P<type>series)/(?P<id>\d+)',
|
||||||
|
]
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://locipo.jp/playlist/35d3dd2b-531d-4824-8575-b1c527d29538',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '35d3dd2b-531d-4824-8575-b1c527d29538',
|
||||||
|
'title': 'レシピ集',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 135,
|
||||||
|
}, {
|
||||||
|
# Redirects to https://locipo.jp/series/1363
|
||||||
|
'url': 'https://locipo.jp/playlist/fef7c4fb-741f-4d6a-a3a6-754f354302a2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1363',
|
||||||
|
'title': 'CBCアナウンサー公式【みてちょてれび】',
|
||||||
|
'description': 'md5:50a1b23e63112d5c06c882835c8c1fb1',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 38,
|
||||||
|
}, {
|
||||||
|
'url': 'https://locipo.jp/series/503',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '503',
|
||||||
|
'title': 'FishingLover東海',
|
||||||
|
'description': '東海地区の釣り場でフィッシングの魅力を余すところなくご紹介!!',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 223,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, path, playlist_id, page):
|
||||||
|
creatives = self._download_json(
|
||||||
|
f'{self._API_BASE}/{path}/{playlist_id}/creatives',
|
||||||
|
playlist_id, f'Downloading page {page + 1}', query={
|
||||||
|
'premium': False,
|
||||||
|
'live': False,
|
||||||
|
'limit': self._PAGE_SIZE,
|
||||||
|
'offset': page * self._PAGE_SIZE,
|
||||||
|
})
|
||||||
|
|
||||||
|
for video_id in traverse_obj(creatives, ('items', ..., 'id', {str})):
|
||||||
|
yield self.url_result(f'{self._BASE_URL}/creative/{video_id}', LocipoIE)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
|
||||||
|
if urlh := self._request_webpage(HEADRequest(url), playlist_id, fatal=False):
|
||||||
|
playlist_type, playlist_id = self._match_valid_url(urlh.url).group('type', 'id')
|
||||||
|
|
||||||
|
path = 'playlists' if playlist_type == 'playlist' else 'series'
|
||||||
|
creatives = self._call_api(
|
||||||
|
f'{path}/{playlist_id}/creatives', playlist_id, path.capitalize())
|
||||||
|
|
||||||
|
entries = InAdvancePagedList(
|
||||||
|
functools.partial(self._fetch_page, path, playlist_id),
|
||||||
|
math.ceil(int(creatives['total']) / self._PAGE_SIZE), self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id,
|
||||||
|
**traverse_obj(creatives, ('items', ..., playlist_type, {
|
||||||
|
'title': ('name', {clean_html}, filter),
|
||||||
|
'description': ('description', {clean_html}, filter),
|
||||||
|
}, any)))
|
||||||
@@ -506,6 +506,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
|
'cast': ({find_elements(attr='data-label', value='pornstar')}, ..., {clean_html}),
|
||||||
}),
|
}),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'http_headers': {'Referer': f'https://www.{host}/'},
|
||||||
}, info)
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
116
yt_dlp/extractor/visir.py
Normal file
116
yt_dlp/extractor/visir.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
UnsupportedError,
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
month_by_name,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import find_element, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class VisirIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Vísir'
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?visir\.is/(?P<type>k|player)/(?P<id>[\da-f-]+)(?:/(?P<slug>[\w.-]+))?'
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.visir.is/k/eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'eabb8f7f-ad87-46fb-9469-a0f1dc0fc4bc-1769022963988',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sveppi og Siggi Þór mestu skaphundarnir',
|
||||||
|
'categories': ['island-i-dag'],
|
||||||
|
'description': 'md5:e06bd6a0cd8bdde328ad8cf00d3d4df6',
|
||||||
|
'duration': 792,
|
||||||
|
'thumbnail': r're:https?://www\.visir\.is/.+',
|
||||||
|
'upload_date': '20260121',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.visir.is/k/b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704/tonlistarborgin-reykjavik',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b0a88e02-eceb-4270-855c-8328b76b9d81-1763979306704',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tónlistarborgin Reykjavík',
|
||||||
|
'categories': ['tonlist'],
|
||||||
|
'description': 'md5:47237589dc95dbde55dfbb163396f88a',
|
||||||
|
'display_id': 'tonlistarborgin-reykjavik',
|
||||||
|
'duration': 81,
|
||||||
|
'thumbnail': r're:https?://www\.visir\.is/.+',
|
||||||
|
'upload_date': '20251124',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.visir.is/player/0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0cd5709e-6870-46d0-aaaf-0ae637de94f1-1770060083580',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sportpakkinn 2. febrúar 2026',
|
||||||
|
'categories': ['sportpakkinn'],
|
||||||
|
'display_id': 'sportpakkinn-2.-februar-2026',
|
||||||
|
'duration': 293,
|
||||||
|
'thumbnail': r're:https?://www\.visir\.is/.+',
|
||||||
|
'upload_date': '20260202',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.visir.is/g/20262837896d/segir-von-brigdin-med-prinsessuna-rista-djupt',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9ad5e58a-f26f-49f7-8b1d-68f0629485b7-1770059257365',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Norðmenn tala ekki um annað en prinsessuna',
|
||||||
|
'categories': ['frettir'],
|
||||||
|
'description': 'md5:53e2623ae79e1355778c14f5b557a0cd',
|
||||||
|
'display_id': 'nordmenn-tala-ekki-um-annad-en-prinsessuna',
|
||||||
|
'duration': 138,
|
||||||
|
'thumbnail': r're:https?://www\.visir\.is/.+',
|
||||||
|
'upload_date': '20260202',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_type, video_id, display_id = self._match_valid_url(url).group('type', 'id', 'slug')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
if video_type == 'player':
|
||||||
|
real_url = self._og_search_url(webpage)
|
||||||
|
if not self.suitable(real_url) or self._match_valid_url(real_url).group('type') == 'player':
|
||||||
|
raise UnsupportedError(real_url)
|
||||||
|
return self.url_result(real_url, self.ie_key())
|
||||||
|
|
||||||
|
upload_date = None
|
||||||
|
date_elements = traverse_obj(webpage, (
|
||||||
|
{find_element(cls='article-item__date')}, {clean_html}, filter, {str.split}))
|
||||||
|
if date_elements and len(date_elements) == 3:
|
||||||
|
day, month, year = date_elements
|
||||||
|
day = int_or_none(day.rstrip('.'))
|
||||||
|
month = month_by_name(month, 'is')
|
||||||
|
if day and month and re.fullmatch(r'[0-9]{4}', year):
|
||||||
|
upload_date = f'{year}{month:02d}{day:02d}'
|
||||||
|
|
||||||
|
player = self._search_json(
|
||||||
|
r'App\.Player\.Init\(', webpage, video_id, 'player', transform_source=js_to_json)
|
||||||
|
m3u8_url = traverse_obj(player, ('File', {urljoin('https://vod.visir.is/')}))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
**traverse_obj(webpage, ({find_element(cls='article-item press-ads')}, {
|
||||||
|
'description': ({find_element(cls='-large')}, {clean_html}, filter),
|
||||||
|
'view_count': ({find_element(cls='article-item__viewcount')}, {clean_html}, {int_or_none}),
|
||||||
|
})),
|
||||||
|
**traverse_obj(player, {
|
||||||
|
'title': ('Title', {clean_html}),
|
||||||
|
'categories': ('Categoryname', {clean_html}, filter, all, filter),
|
||||||
|
'duration': ('MediaDuration', {int_or_none}),
|
||||||
|
'thumbnail': ('Image', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
@@ -213,16 +213,9 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _decipher_format_url(self, format_url, format_id):
|
_VALID_HEX_RE = r'[0-9a-fA-F]{12,}'
|
||||||
parsed_url = urllib.parse.urlparse(format_url)
|
|
||||||
|
|
||||||
hex_string, path_remainder = self._search_regex(
|
|
||||||
r'^/(?P<hex>[0-9a-fA-F]{12,})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
|
|
||||||
default=(None, None), group=('hex', 'rem'))
|
|
||||||
if not hex_string:
|
|
||||||
self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
def _decipher_hex_string(self, hex_string, format_id):
|
||||||
byte_data = bytes.fromhex(hex_string)
|
byte_data = bytes.fromhex(hex_string)
|
||||||
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True)
|
||||||
|
|
||||||
@@ -232,7 +225,33 @@ class XHamsterIE(InfoExtractor):
|
|||||||
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
|
self.report_warning(f'Skipping format "{format_id}": {e.msg}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
deciphered = bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1')
|
||||||
|
|
||||||
|
def _decipher_format_url(self, format_url, format_id):
|
||||||
|
# format_url can be hex ciphertext or a URL with a hex ciphertext segment
|
||||||
|
if re.fullmatch(self._VALID_HEX_RE, format_url):
|
||||||
|
return self._decipher_hex_string(format_url, format_id)
|
||||||
|
elif not url_or_none(format_url):
|
||||||
|
if re.fullmatch(r'[0-9a-fA-F]+', format_url):
|
||||||
|
# Hex strings that are too short are expected, so we don't want to warn
|
||||||
|
self.write_debug(f'Skipping dummy ciphertext for "{format_id}": {format_url}')
|
||||||
|
else:
|
||||||
|
# Something has likely changed on the site's end, so we need to warn
|
||||||
|
self.report_warning(f'Skipping format "{format_id}": invalid ciphertext')
|
||||||
|
return None
|
||||||
|
|
||||||
|
parsed_url = urllib.parse.urlparse(format_url)
|
||||||
|
|
||||||
|
hex_string, path_remainder = self._search_regex(
|
||||||
|
rf'^/(?P<hex>{self._VALID_HEX_RE})(?P<rem>[/,].+)$', parsed_url.path, 'url components',
|
||||||
|
default=(None, None), group=('hex', 'rem'))
|
||||||
|
if not hex_string:
|
||||||
|
self.report_warning(f'Skipping format "{format_id}": unsupported URL format')
|
||||||
|
return None
|
||||||
|
|
||||||
|
deciphered = self._decipher_hex_string(hex_string, format_id)
|
||||||
|
if not deciphered:
|
||||||
|
return None
|
||||||
|
|
||||||
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
|
return parsed_url._replace(path=f'/{deciphered}{path_remainder}').geturl()
|
||||||
|
|
||||||
|
|||||||
@@ -75,6 +75,9 @@ MONTH_NAMES = {
|
|||||||
'fr': [
|
'fr': [
|
||||||
'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
|
'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
|
||||||
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
|
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
|
||||||
|
'is': [
|
||||||
|
'janúar', 'febrúar', 'mars', 'apríl', 'maí', 'júní',
|
||||||
|
'júlí', 'ágúst', 'september', 'október', 'nóvember', 'desember'],
|
||||||
# these follow the genitive grammatical case (dopełniacz)
|
# these follow the genitive grammatical case (dopełniacz)
|
||||||
# some websites might be using nominative, which will require another month list
|
# some websites might be using nominative, which will require another month list
|
||||||
# https://en.wikibooks.org/wiki/Polish/Noun_cases
|
# https://en.wikibooks.org/wiki/Polish/Noun_cases
|
||||||
|
|||||||
Reference in New Issue
Block a user