mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-25 17:38:54 +00:00
Merge branch 'master' into threads
This commit is contained in:
@@ -2136,6 +2136,11 @@ class YoutubeDL:
|
||||
|
||||
def _check_formats(self, formats):
|
||||
for f in formats:
|
||||
working = f.get('__working')
|
||||
if working is not None:
|
||||
if working:
|
||||
yield f
|
||||
continue
|
||||
self.to_screen('[info] Testing format %s' % f['format_id'])
|
||||
path = self.get_output_path('temp')
|
||||
if not self._ensure_dir_exists(f'{path}/'):
|
||||
@@ -2152,33 +2157,44 @@ class YoutubeDL:
|
||||
os.remove(temp_file.name)
|
||||
except OSError:
|
||||
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
||||
f['__working'] = success
|
||||
if success:
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
||||
|
||||
def _select_formats(self, formats, selector):
|
||||
return list(selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
download = download and not self.params.get('simulate')
|
||||
prefer_best = download and (
|
||||
self.params['outtmpl']['default'] == '-'
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
|
||||
prefer_best = (
|
||||
not self.params.get('simulate')
|
||||
and download
|
||||
and (
|
||||
not can_merge()
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start')
|
||||
or self.params['outtmpl']['default'] == '-'))
|
||||
compat = (
|
||||
prefer_best
|
||||
or self.params.get('allow_multiple_audio_streams', False)
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
if not prefer_best and download and not can_merge():
|
||||
prefer_best = True
|
||||
formats = self._get_formats(info_dict)
|
||||
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||
if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
|
||||
self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
|
||||
'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
|
||||
|
||||
return (
|
||||
'best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo*+bestaudio/best' if not compat
|
||||
else 'bestvideo+bestaudio/best')
|
||||
compat = (self.params.get('allow_multiple_audio_streams')
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
|
||||
return ('best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo+bestaudio/best' if compat
|
||||
else 'bestvideo*+bestaudio/best')
|
||||
|
||||
def build_format_selector(self, format_spec):
|
||||
def syntax_error(note, start):
|
||||
@@ -2928,12 +2944,7 @@ class YoutubeDL:
|
||||
self.write_debug(f'Default format spec: {req_format}')
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
|
||||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
formats_to_download = self._select_formats(formats, format_selector)
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
continue
|
||||
@@ -3060,7 +3071,7 @@ class YoutubeDL:
|
||||
f = formats[-1]
|
||||
self.report_warning(
|
||||
'No subtitle format found matching "%s" for language %s, '
|
||||
'using %s' % (formats_query, lang, f['ext']))
|
||||
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ from .utils import (
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
@@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
@@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
@@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
@@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
@@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||
if value is None:
|
||||
return is_encrypted, None
|
||||
|
||||
# In chrome, session cookies have expires_utc set to 0
|
||||
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||
if not expires_utc:
|
||||
expires_utc = None
|
||||
|
||||
return is_encrypted, http.cookiejar.Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,10 +6,10 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
|
||||
@@ -12,20 +12,21 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
|
||||
@@ -3,10 +3,10 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
|
||||
@@ -4,11 +4,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from ..utils import (
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +35,7 @@ class AluraIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
course, video_id = self._match_valid_url(url)
|
||||
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||
video_url = self._VIDEO_URL % (course, video_id)
|
||||
|
||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||
@@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video_obj in video_dict:
|
||||
video_url_m3u8 = video_obj.get('link')
|
||||
video_url_m3u8 = video_obj.get('mp4')
|
||||
video_format = self._extract_m3u8_formats(
|
||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
@@ -4,8 +4,8 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
@@ -31,20 +32,6 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'info_dict': {
|
||||
'id': '100103-000-A',
|
||||
'title': 'USA: Dyskryminacja na porodówce',
|
||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
||||
'alt_title': 'ARTE Reportage',
|
||||
'upload_date': '20201103',
|
||||
'duration': 554,
|
||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
||||
'timestamp': 1604417980,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
@@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
|
||||
'info_dict': {
|
||||
'id': '109067-000-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
|
||||
'timestamp': 1713927600,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
|
||||
'duration': 7599,
|
||||
'title': 'La loi de Téhéran',
|
||||
'upload_date': '20240424',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
'fr-forced': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
@@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
@@ -143,16 +131,18 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for fmt in sub_formats:
|
||||
if fmt.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(fmt)
|
||||
url = fmt.get('url') or ''
|
||||
suffix = ('acc' if url.endswith('-MAL.m3u8')
|
||||
else 'forced' if '_VO' not in url
|
||||
else None)
|
||||
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
language_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
@@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||
if m:
|
||||
lang_pref = int(''.join('01'[x] for x in (
|
||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
||||
m.group('vlang') == language_code, # we prefer voice in the requested language
|
||||
not m.group('audio_desc'), # and not the audio description version
|
||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
||||
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
|
||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
|
||||
@@ -2,10 +2,10 @@ import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
|
||||
@@ -2,12 +2,12 @@ import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
format_field,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -2,11 +2,11 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
@@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
@@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
@@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||
'timestamp': 1646058397,
|
||||
'upload_date': '20220228',
|
||||
'duration': 255,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
@@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
@@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
@@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
@@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
@@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'timestamp': 1638215626,
|
||||
'upload_date': '20211129',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||
'info_dict': {
|
||||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
@@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
@@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
@@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'title': 'The downsides of positive thinking',
|
||||
'description': 'The downsides of positive thinking',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20220223',
|
||||
'timestamp': 1645632746,
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
}
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
@@ -1008,8 +1039,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
@@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})
|
||||
),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
})
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
@@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
@@ -1234,27 +1349,90 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model and (entry := parse_model(model)):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
|
||||
@@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid):
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}')['data']
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
@@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology'),
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
@@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
@@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage = self._download_webpage(url, episode_id, headers=headers)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
@@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
@@ -1043,15 +1045,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] == -401:
|
||||
if response['code'] in (-352, -401):
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
@@ -5,7 +6,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloggerIE(InfoExtractor):
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
int_or_none,
|
||||
qualities,
|
||||
str_or_none,
|
||||
@@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||
|
||||
auth_headers = {}
|
||||
auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
|
||||
if auth_cookie is not None:
|
||||
try:
|
||||
auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
|
||||
auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
|
||||
|
||||
post = self._download_json(
|
||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)
|
||||
|
||||
post_title = post.get('title')
|
||||
if not post_title:
|
||||
@@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor):
|
||||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||
}, get_all=False)})
|
||||
|
||||
if not entries:
|
||||
if not entries and not post.get('hasAccess'):
|
||||
self.raise_login_required('This post requires a subscription', metadata_available=True)
|
||||
elif not entries:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
from ..utils import js_to_json, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class BoxCastVideoIE(InfoExtractor):
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -12,10 +12,11 @@ from ..compat import (
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
@@ -29,7 +30,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor):
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_page, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_API, None, 'Downloading login page', expected_status=401)
|
||||
if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
|
||||
self.write_debug('Cookies are valid, no login required.')
|
||||
return
|
||||
|
||||
if urlh.status == 401:
|
||||
self.write_debug('Got HTTP Error 401; cookies have been invalidated')
|
||||
login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CableAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cableav.tv/lS4iR9lWjN8/',
|
||||
'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
|
||||
'info_dict': {
|
||||
'id': 'lS4iR9lWjN8',
|
||||
'ext': 'mp4',
|
||||
'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
|
||||
'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
74
yt_dlp/extractor/caffeinetv.py
Normal file
74
yt_dlp/extractor/caffeinetv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaffeineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'info_dict': {
|
||||
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOOOOD MORNINNNNN #highlights',
|
||||
'timestamp': 1654702180,
|
||||
'upload_date': '20220608',
|
||||
'uploader': 'RahJON Wicc',
|
||||
'uploader_id': 'TsuSurf',
|
||||
'duration': 3145,
|
||||
'age_limit': 17,
|
||||
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['highlights', 'battlerap'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
|
||||
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
|
||||
|
||||
video_url = broadcast_info['video_url']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(json_data, {
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
}),
|
||||
**traverse_obj(broadcast_info, {
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
'FOUR_PLUS': 0,
|
||||
'NINE_PLUS': 9,
|
||||
'TWELVE_PLUS': 12,
|
||||
'SEVENTEEN_PLUS': 17,
|
||||
}.get(broadcast_info.get('content_rating'), 17),
|
||||
}
|
||||
@@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
|
||||
'id': '24484',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
|
||||
'description': 'md5:3de3f151180684621e85be7c10e4e613',
|
||||
'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
|
||||
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
|
||||
'upload_date': '20211026',
|
||||
'duration': 360,
|
||||
@@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
|
||||
'duration': 360,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
|
||||
'info_dict': {
|
||||
'id': '33500',
|
||||
'ext': 'mp4',
|
||||
'title': 'Encore des mesures d\'économie dans le Jura',
|
||||
'description': 'md5:938b5b556592f2d1b9ab150268082a80',
|
||||
'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
|
||||
'upload_date': '20240411',
|
||||
'duration': 105,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._parse_json(self._search_regex(
|
||||
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
|
||||
webpage, 'data_json'), id)['1']['data']['data']
|
||||
webpage, 'data_json'), video_id)['1']['data']['data']
|
||||
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
|
||||
subtitles = {}
|
||||
formats = [{
|
||||
@@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor):
|
||||
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
|
||||
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
|
||||
if manifests.get('hls'):
|
||||
m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
|
||||
formats.extend(m3u8_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifests['hls'], video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if manifests.get('dash'):
|
||||
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
|
||||
formats.extend(dash_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, dash_subs)
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifests['dash'], video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
return {
|
||||
'id': id,
|
||||
'id': video_id,
|
||||
'title': data_json.get('title').strip(),
|
||||
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
|
||||
'thumbnail': data_json.get('poster'),
|
||||
|
||||
@@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
@@ -277,6 +277,28 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||
'info_dict': {
|
||||
'id': '2334524995812',
|
||||
'ext': 'mp4',
|
||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||
'timestamp': 1714788791,
|
||||
'duration': 77.678,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:0',
|
||||
'upload_date': '20240504',
|
||||
'categories': 'count:3',
|
||||
'series': 'The National',
|
||||
'tags': 'count:15',
|
||||
'creators': ['encoder'],
|
||||
'location': 'Canada',
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
'url': 'cbcplayer:1.7159484',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -5,14 +5,14 @@ from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
@@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
|
||||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20160220',
|
||||
'timestamp': 1455968218,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
# Age-restricted with vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/8753244c4',
|
||||
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'id': '8753244c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
|
||||
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'uploader': 'arhn eu',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'duration': 991,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
'timestamp': 1633888264,
|
||||
'upload_date': '20211010',
|
||||
}
|
||||
}, {
|
||||
# Age-restricted without vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/17028157b8',
|
||||
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
|
||||
'info_dict': {
|
||||
'id': '17028157b8',
|
||||
'ext': 'mp4',
|
||||
'title': 'STENDUPY MICHAŁ OGIŃSKI',
|
||||
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
|
||||
'height': 480,
|
||||
'uploader': 'oginski',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 18855,
|
||||
'age_limit': 18,
|
||||
'average_rating': float,
|
||||
'timestamp': 1699705901,
|
||||
'upload_date': '20231111',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
data, content_type = multipart_encode({'age_confirm': ''})
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
url, video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
@@ -164,7 +182,7 @@ class CDAIE(InfoExtractor):
|
||||
if 'Authorization' in self._API_HEADERS:
|
||||
return self._api_extract(video_id)
|
||||
else:
|
||||
return self._web_extract(video_id, url)
|
||||
return self._web_extract(video_id)
|
||||
|
||||
def _api_extract(self, video_id):
|
||||
meta = self._download_json(
|
||||
@@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
|
||||
'view_count': meta.get('views'),
|
||||
}
|
||||
|
||||
def _web_extract(self, video_id, url):
|
||||
def _web_extract(self, video_id):
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
@@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
urlh.url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
@@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
|
||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||
''', webpage, 'uploader', default=None, group='uploader')
|
||||
view_count = self._search_regex(
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||
@@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
'average_rating': float_or_none(average_rating),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
|
||||
@@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class ClippitIE(InfoExtractor):
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import collections
|
||||
import functools
|
||||
import getpass
|
||||
import hashlib
|
||||
import http.client
|
||||
@@ -21,7 +22,6 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
@@ -957,7 +957,8 @@ class InfoExtractor:
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
@@ -1005,8 +1006,10 @@ class InfoExtractor:
|
||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id):
|
||||
basen = f'{video_id}_{url}'
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
@@ -1028,7 +1031,8 @@ class InfoExtractor:
|
||||
except LookupError:
|
||||
return webpage_bytes.decode('utf-8', 'replace')
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
@@ -1037,7 +1041,9 @@ class InfoExtractor:
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
if isinstance(url_or_request, Request):
|
||||
data = self._create_request(url_or_request, data).data
|
||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
@@ -1098,7 +1104,7 @@ class InfoExtractor:
|
||||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
@@ -2445,7 +2451,7 @@ class InfoExtractor:
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
@@ -3378,23 +3384,16 @@ class InfoExtractor:
|
||||
return formats
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
||||
video_id=video_id,
|
||||
transform_source=transform_source)
|
||||
except ExtractorError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(jwplayer_data, dict):
|
||||
return jwplayer_data
|
||||
return self._search_json(
|
||||
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
|
||||
webpage, 'JWPlayer data', video_id,
|
||||
# must be a {...} or sequence, ending
|
||||
contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
|
||||
transform_source=transform_source, default=None)
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
webpage, video_id, transform_source=transform_source)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
@@ -3426,22 +3425,14 @@ class InfoExtractor:
|
||||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
for track in traverse_obj(video_data, (
|
||||
'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entry = {
|
||||
'id': this_video_id,
|
||||
@@ -3526,7 +3517,7 @@ class InfoExtractor:
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
||||
@@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor):
|
||||
'Your URL starts with a Byte Order Mark (BOM). '
|
||||
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||
return self.url_result(real_url)
|
||||
|
||||
|
||||
class BlobIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'blob:'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError(
|
||||
'You\'ve asked yt-dlp to download a blob URL. '
|
||||
'A blob URL exists only locally in your browser. '
|
||||
'It is not possible for yt-dlp to access it.', expected=True)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -13,7 +14,6 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import base64
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -24,6 +25,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
@@ -53,15 +55,19 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try: # TODO: Add impersonation support here
|
||||
try:
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data))
|
||||
headers=headers, data=urlencode_postdata(data), impersonate=True)
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
if target := error.cause.response.extensions.get('impersonate'):
|
||||
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'Request blocked by Cloudflare. '
|
||||
'Install the required impersonation dependency if possible, '
|
||||
'or else navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
@@ -175,10 +181,19 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
display_id = identifier
|
||||
|
||||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
|
||||
try:
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
|
||||
except ExtractorError as error:
|
||||
if self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(error.orig_msg)
|
||||
return [], {}
|
||||
elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
|
||||
raise ExtractorError(
|
||||
'You have reached the rate-limit for active streams; try again later', expected=True)
|
||||
raise
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
@@ -207,7 +222,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
else:
|
||||
continue # XXX: Update this if/when meta mpd formats are working
|
||||
continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
@@ -217,6 +232,15 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||
|
||||
# Invalidate stream token to avoid rate-limit
|
||||
error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
|
||||
if stream_token := stream_response.get('token'):
|
||||
self._request_webpage(Request(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
|
||||
headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
|
||||
else:
|
||||
self.report_warning(error_msg)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
@@ -394,10 +418,11 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
result['chapters'] = self._extract_chapters(internal_id)
|
||||
|
||||
@@ -583,14 +608,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
if not response:
|
||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||
else:
|
||||
self.raise_login_required(message, method='password', metadata_available=True)
|
||||
else:
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
from ..compat import compat_HTMLParseError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
@@ -19,8 +21,6 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
from ..compat import compat_str
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
|
||||
|
||||
class DamtomoBaseIE(InfoExtractor):
|
||||
|
||||
197
yt_dlp/extractor/dangalplay.py
Normal file
197
yt_dlp/extractor/dangalplay.py
Normal file
@@ -0,0 +1,197 @@
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DangalPlayBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dangalplay'
|
||||
_OTV_USER_ID = None
|
||||
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
|
||||
_API_BASE = 'https://ottapi.dangalplay.com'
|
||||
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
|
||||
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._OTV_USER_ID:
|
||||
return
|
||||
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
self._OTV_USER_ID = password
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._OTV_USER_ID:
|
||||
self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
|
||||
|
||||
def _extract_episode_info(self, metadata, episode_slug, series_slug):
|
||||
return {
|
||||
'display_id': episode_slug,
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'season-(\d+)', series_slug, 'season number', default='1')),
|
||||
'series': series_slug,
|
||||
**traverse_obj(metadata, {
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
|
||||
headers={'Accept': 'application/json'}, query={
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'region': 'IN',
|
||||
**query,
|
||||
})
|
||||
|
||||
|
||||
class DangalPlayIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '647c61dc1e7171310dcd49b4',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1262304000,
|
||||
'episode_number': 1,
|
||||
'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'series': 'kitani-mohabbat-hai-season-2',
|
||||
'season_number': 2,
|
||||
'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'release_date': '20100101',
|
||||
'duration': 2325,
|
||||
'season': 'Season 2',
|
||||
'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '65d31d9ba73b9c3abd14a7f3',
|
||||
'ext': 'mp4',
|
||||
'episode': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'release_timestamp': 1708367411,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'title': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'duration': 156048,
|
||||
'release_date': '20240219',
|
||||
'season_number': 1,
|
||||
'series': 'MILKE BHI HUM NA MILE',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
|
||||
},
|
||||
}]
|
||||
|
||||
def _generate_api_data(self, data):
|
||||
catalog_id = data['catalog_id']
|
||||
content_id = data['content_id']
|
||||
timestamp = str(int(time.time()))
|
||||
unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
|
||||
|
||||
return json.dumps({
|
||||
'catalog_id': catalog_id,
|
||||
'content_id': content_id,
|
||||
'category': '',
|
||||
'region': 'IN',
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'id': self._OTV_USER_ID,
|
||||
'md5': hashlib.md5(unhashed.encode()).hexdigest(),
|
||||
'ts': timestamp,
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
|
||||
metadata = self._call_api(
|
||||
f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
|
||||
episode_slug, query={'item_language': ''})['data']
|
||||
|
||||
try:
|
||||
details = self._download_json(
|
||||
f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
|
||||
'Downloading playback details JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=self._generate_api_data(metadata))['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 422:
|
||||
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
|
||||
if error_info.get('code') == '1016':
|
||||
self.raise_login_required(
|
||||
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
|
||||
elif msg := error_info.get('message'):
|
||||
raise ExtractorError(msg)
|
||||
raise
|
||||
|
||||
m3u8_url = traverse_obj(details, (
|
||||
('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
|
||||
|
||||
return {
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._extract_episode_info(metadata, episode_slug, series_slug),
|
||||
}
|
||||
|
||||
|
||||
class DangalPlaySeasonIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
|
||||
'playlist_mincount': 170,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
|
||||
'playlist_count': 30,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
|
||||
},
|
||||
}, {
|
||||
# 1 season only, series page is season page
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
|
||||
'playlist_mincount': 15,
|
||||
'info_dict': {
|
||||
'id': 'milke-bhi-hum-na-mile',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, subcategories, series_slug):
|
||||
for subcategory in subcategories:
|
||||
data = self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
|
||||
series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
|
||||
'order_by': 'asc',
|
||||
'status': 'published',
|
||||
})
|
||||
for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
|
||||
episode_slug = ep['friendly_id']
|
||||
yield self.url_result(
|
||||
f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
|
||||
DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
|
||||
subcategories = [subcategory] if subcategory else traverse_obj(
|
||||
self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}.gzip', series_slug,
|
||||
'Downloading season info JSON', query={'item_language': ''}),
|
||||
('data', 'subcategories', ..., 'friendly_id', {str}))
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))
|
||||
@@ -1,11 +1,11 @@
|
||||
import re
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
remove_start,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
|
||||
@@ -2,9 +2,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
|
||||
@@ -2,10 +2,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import time
|
||||
import hashlib
|
||||
import time
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ import uuid
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
@@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
'drmSupported': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
'wisteriaProperties': {},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
|
||||
'info_dict': {
|
||||
'id': '4756322',
|
||||
'ext': 'mp4',
|
||||
'title': 'German Gold',
|
||||
'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
|
||||
'display_id': 'goldrausch-in-australien/german-gold',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Goldrausch in Australien',
|
||||
'duration': 2648.0,
|
||||
'upload_date': '20230517',
|
||||
'timestamp': 1684357500,
|
||||
'creators': ['DMAX'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
|
||||
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
'info_dict': {
|
||||
'id': '78867',
|
||||
@@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
'season_number': 1,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||
'only_matching': True,
|
||||
@@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (programme, alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', realm, country)
|
||||
url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
||||
@@ -2,8 +2,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
|
||||
@@ -2,15 +2,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
'id': '9097',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ae Dil Hai Mushkil',
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
'EJOutcomes': player_params['data-ejpingables'],
|
||||
'NativeHLS': False
|
||||
}),
|
||||
'arcVersion': 3,
|
||||
'appVersion': 59,
|
||||
'gorilla.csrf.Token': page_id,
|
||||
}))['Data']
|
||||
|
||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||
raise ExtractorError(
|
||||
'Download rate reached. Please try again later.', expected=True)
|
||||
|
||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = ej_links.get('HLSLink')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
|
||||
mp4_url = ej_links.get('MP4Link')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
description = get_elements_by_class('synopsis', webpage)[0]
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||
webpage, 'thumbnail url', fatal=False, group='url')
|
||||
if thumbnail is not None:
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
@@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
|
||||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'id': '335699-0001-006',
|
||||
'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'release_date': '20201221',
|
||||
'release_timestamp': 1608544800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'This event may not be accessible',
|
||||
'No video formats found',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
|
||||
'info_dict': {
|
||||
'id': '348021-0054-001',
|
||||
'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20220115',
|
||||
'release_timestamp': 1642233600,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
@@ -124,6 +142,10 @@ class EplusIbIE(InfoExtractor):
|
||||
if data_json.get('drm_mode') == 'ON':
|
||||
self.report_drm(video_id)
|
||||
|
||||
if data_json.get('is_pass_ticket') == 'YES':
|
||||
raise ExtractorError(
|
||||
'This URL is for a pass ticket instead of a player page', expected=True)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
|
||||
@@ -4,15 +4,15 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
xpath_text
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -94,13 +94,14 @@ class EuropaIE(InfoExtractor):
|
||||
|
||||
class EuroParlWebstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
||||
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
||||
https?://multimedia\.europarl\.europa\.eu/
|
||||
(?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||
'info_dict': {
|
||||
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||
'display_id': '20220914-0900-PLENARY',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plenary session',
|
||||
'release_timestamp': 1663139069,
|
||||
@@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||
'info_dict': {
|
||||
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||
'display_id': '20230301-1130-COMMITTEE-CULT',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20230301',
|
||||
'title': 'Committee on Culture and Education',
|
||||
@@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Not live anymore'
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
|
||||
'info_dict': {
|
||||
'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
|
||||
'display_id': '20240320-1345-SPECIAL-PRESSER',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240320',
|
||||
'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
|
||||
'release_timestamp': 1710939767,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -166,6 +181,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': json_info['id'],
|
||||
'display_id': display_id,
|
||||
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
try_get,
|
||||
mimetype2ext
|
||||
)
|
||||
from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
|
||||
|
||||
|
||||
class FancodeVodIE(InfoExtractor):
|
||||
|
||||
@@ -3,9 +3,9 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
|
||||
@@ -2,10 +2,10 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
|
||||
107
yt_dlp/extractor/gbnews.py
Normal file
107
yt_dlp/extractor/gbnews.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_elements_html_by_class,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GBNewsIE(InfoExtractor):
|
||||
IE_DESC = 'GB News clips, features and live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
|
||||
|
||||
_PLATFORM = 'safari'
|
||||
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||
'info_dict': {
|
||||
'id': '52264136',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||
'description': 'The post was criticised by former employers of the broadcaster',
|
||||
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
|
||||
'info_dict': {
|
||||
'id': '52328390',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
|
||||
'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family',
|
||||
'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.gbnews.uk/watchlive',
|
||||
'info_dict': {
|
||||
'id': '1069',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'watchlive',
|
||||
'live_status': 'is_live',
|
||||
'title': r're:^GB News Live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.lru_cache
|
||||
def _get_ss_endpoint(self, data_id, data_env):
|
||||
if not data_id:
|
||||
data_id = 'GB003'
|
||||
if not data_env:
|
||||
data_env = 'production'
|
||||
|
||||
json_data = self._download_json(
|
||||
self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={
|
||||
'id': data_id,
|
||||
'env': data_env,
|
||||
})
|
||||
meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none}))
|
||||
if not meta_url:
|
||||
raise ExtractorError('No API host found')
|
||||
|
||||
return meta_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).rpartition('/')[2]
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_data = None
|
||||
elements = get_elements_html_by_class('simplestream', webpage)
|
||||
for html_tag in elements:
|
||||
attributes = extract_attributes(html_tag)
|
||||
if 'sidebar' not in (attributes.get('class') or ''):
|
||||
video_data = attributes
|
||||
if not video_data:
|
||||
raise ExtractorError('Could not find video element', expected=True)
|
||||
|
||||
endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env'))
|
||||
|
||||
uvid = video_data['data-uvid']
|
||||
video_type = video_data.get('data-type')
|
||||
if not video_type or video_type == 'vod':
|
||||
video_type = 'show'
|
||||
stream_data = self._download_json(
|
||||
f'{endpoint_url}/api/{video_type}/stream/{uvid}',
|
||||
uvid, 'Downloading stream JSON', query={
|
||||
'key': video_data.get('data-key'),
|
||||
'platform': self._PLATFORM,
|
||||
})
|
||||
if traverse_obj(stream_data, 'drm'):
|
||||
self.report_drm(uvid)
|
||||
|
||||
return {
|
||||
'id': uvid,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(traverse_obj(stream_data, (
|
||||
'response', 'stream', {url_or_none})), uvid, 'mp4'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'is_live': video_type == 'live',
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import types
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor # isort: split
|
||||
from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
ExtractorError,
|
||||
bool_or_none,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
|
||||
@@ -3,9 +3,9 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
|
||||
@@ -3,16 +3,16 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
remove_start,
|
||||
remove_end,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
unified_timestamp,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
|
||||
79
yt_dlp/extractor/godresource.py
Normal file
79
yt_dlp/extractor/godresource.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GodResourceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://new\.godresource\.com/video/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# hls stream
|
||||
'url': 'https://new.godresource.com/video/A01mTKjyf6w',
|
||||
'info_dict': {
|
||||
'id': 'A01mTKjyf6w',
|
||||
'ext': 'mp4',
|
||||
'view_count': int,
|
||||
'timestamp': 1710978666,
|
||||
'channel_id': '5',
|
||||
'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg',
|
||||
'channel': 'Stedfast Baptist Church',
|
||||
'upload_date': '20240320',
|
||||
'title': 'GodResource video #A01mTKjyf6w',
|
||||
}
|
||||
}, {
|
||||
# mp4 link
|
||||
'url': 'https://new.godresource.com/video/01DXmBbQv_X',
|
||||
'md5': '0e8f72aa89a106b9d5c011ba6f8717b7',
|
||||
'info_dict': {
|
||||
'id': '01DXmBbQv_X',
|
||||
'ext': 'mp4',
|
||||
'channel_id': '12',
|
||||
'view_count': int,
|
||||
'timestamp': 1687996800,
|
||||
'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg',
|
||||
'channel': 'Documentaries',
|
||||
'title': 'The Sodomite Deception',
|
||||
'upload_date': '20230629',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
api_data = self._download_json(
|
||||
f'https://api.godresource.com/api/Streams/{display_id}', display_id)
|
||||
|
||||
video_url = api_data['streamUrl']
|
||||
is_live = api_data.get('isLive') or False
|
||||
if (ext := determine_ext(video_url)) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, display_id, live=is_live)
|
||||
elif ext == 'mp4':
|
||||
formats, subtitles = [{
|
||||
'url': video_url,
|
||||
'ext': ext
|
||||
}], {}
|
||||
else:
|
||||
raise ExtractorError(f'Unexpected video format {ext}')
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': '',
|
||||
'is_live': is_live,
|
||||
**traverse_obj(api_data, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'view_count': ('views', {int}),
|
||||
'channel': ('channelName', {str}),
|
||||
'channel_id': ('channelId', {str_or_none}),
|
||||
'timestamp': ('streamDateCreated', {unified_timestamp}),
|
||||
'modified_timestamp': ('streamDataModified', {unified_timestamp})
|
||||
})
|
||||
}
|
||||
@@ -1,10 +1,7 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get
|
||||
)
|
||||
from ..utils import ExtractorError, try_get
|
||||
|
||||
|
||||
class GofileIE(InfoExtractor):
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
@@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'duration': 45,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
}
|
||||
}, {
|
||||
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
|
||||
'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'md5': '322db8d63dd19788c04050a4bba67073',
|
||||
'info_dict': {
|
||||
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
'ext': 'mp3',
|
||||
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
|
||||
'duration': 184,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||
},
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||
@@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
'6': 'flv',
|
||||
'13': '3gp',
|
||||
'17': '3gp',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
'34': 'flv',
|
||||
'35': 'flv',
|
||||
'36': '3gp',
|
||||
'37': 'mp4',
|
||||
'38': 'mp4',
|
||||
'43': 'webm',
|
||||
'44': 'webm',
|
||||
'45': 'webm',
|
||||
'46': 'webm',
|
||||
'59': 'mp4',
|
||||
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
|
||||
'50': 'm4a',
|
||||
}
|
||||
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||
_CAPTIONS_ENTRY_TAG = {
|
||||
@@ -194,10 +193,13 @@ class GoogleDriveIE(InfoExtractor):
|
||||
if len(fmt_stream_split) < 2:
|
||||
continue
|
||||
format_id, format_url = fmt_stream_split[:2]
|
||||
ext = self._FORMATS_EXT.get(format_id)
|
||||
if not ext:
|
||||
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
|
||||
f = {
|
||||
'url': lowercase_escape(format_url),
|
||||
'format_id': format_id,
|
||||
'ext': self._FORMATS_EXT[format_id],
|
||||
'ext': ext,
|
||||
}
|
||||
resolution = resolutions.get(format_id)
|
||||
if resolution:
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
url_or_none
|
||||
)
|
||||
|
||||
import json
|
||||
from ..utils import try_get, url_or_none
|
||||
|
||||
|
||||
class GoToStageIE(InfoExtractor):
|
||||
|
||||
@@ -2,11 +2,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
KNOWN_EXTENSIONS,
|
||||
determine_ext,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class HearThisAtIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)'
|
||||
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
||||
_TESTS = [{
|
||||
'url': 'https://hearthis.at/moofi/dr-kreep',
|
||||
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
||||
'info_dict': {
|
||||
'id': '150939',
|
||||
'display_id': 'moofi - dr-kreep',
|
||||
'ext': 'wav',
|
||||
'title': 'Moofi - Dr. Kreep',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
@@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor):
|
||||
'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
|
||||
'upload_date': '20150118',
|
||||
'view_count': int,
|
||||
'duration': 71,
|
||||
'genre': 'Experimental',
|
||||
}
|
||||
'duration': 70,
|
||||
'genres': ['Experimental'],
|
||||
},
|
||||
}, {
|
||||
# 'download' link redirects to the original webpage
|
||||
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
|
||||
'md5': '5980ceb7c461605d30f1f039df160c6e',
|
||||
'info_dict': {
|
||||
'id': '811296',
|
||||
'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix',
|
||||
'ext': 'mp3',
|
||||
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
|
||||
'description': 'md5:ef26815ca8f483272a87b137ff175be2',
|
||||
@@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'duration': 4360,
|
||||
'genre': 'Dance',
|
||||
'genres': ['Dance'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/',
|
||||
'md5': 'cd08e51911f147f6da2d9678905b0bd9',
|
||||
'info_dict': {
|
||||
'id': '2685222',
|
||||
'ext': 'mp3',
|
||||
'duration': 86,
|
||||
'view_count': int,
|
||||
'timestamp': 1545471670,
|
||||
'display_id': 'tindalos - 0001-tindalos-gnrique',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'genres': ['Other'],
|
||||
'title': 'Tindalos - Tindalos - générique n°1',
|
||||
'description': '',
|
||||
'upload_date': '20181222',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/',
|
||||
'md5': 'b45ac60f0c8111eef6ddc10ec232e312',
|
||||
'info_dict': {
|
||||
'id': '7145959',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2',
|
||||
'duration': 8986,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9',
|
||||
'timestamp': 1588699409,
|
||||
'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011',
|
||||
'view_count': int,
|
||||
'upload_date': '20200505',
|
||||
'genres': ['Other'],
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
|
||||
@@ -4,8 +4,8 @@ from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
|
||||
@@ -2,8 +2,8 @@ import hashlib
|
||||
import random
|
||||
import re
|
||||
|
||||
from ..compat import compat_urlparse, compat_b64decode
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode, compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -13,8 +13,6 @@ from ..utils import (
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import re
|
||||
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class HytaleIE(InfoExtractor):
|
||||
@@ -49,7 +50,7 @@ class HytaleIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result(
|
||||
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
|
||||
title=self._titles.get(video_hash), url_transparent=True)
|
||||
CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
|
||||
for video_hash in re.findall(
|
||||
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
|
||||
webpage)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
|
||||
from ..compat import compat_str
|
||||
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
|
||||
|
||||
|
||||
class IchinanaLiveIE(InfoExtractor):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .bokecc import BokeCCBaseIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
@@ -6,10 +7,9 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
|
||||
class InfoQIE(BokeCCBaseIE):
|
||||
|
||||
@@ -255,7 +255,7 @@ class InstagramIOSIE(InfoExtractor):
|
||||
|
||||
|
||||
class InstagramIE(InstagramBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
@@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/reels/Cop84x6u7CP/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -3,12 +3,12 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,20 +4,16 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_unquote
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ohdave_rsa_encrypt,
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user