mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-17 14:05:48 +00:00
Merge branch 'yt-dlp:master' into pr/live-sections
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -6,10 +6,10 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
|
||||
@@ -12,20 +12,21 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
|
||||
@@ -3,10 +3,10 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
|
||||
@@ -4,11 +4,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from ..utils import (
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
@@ -4,8 +4,8 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
|
||||
@@ -2,10 +2,10 @@ import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
|
||||
@@ -2,12 +2,12 @@ import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
format_field,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -2,11 +2,11 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
@@ -5,7 +6,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloggerIE(InfoExtractor):
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
from ..utils import js_to_json, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class BoxCastVideoIE(InfoExtractor):
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -12,10 +12,11 @@ from ..compat import (
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
@@ -29,7 +30,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor):
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_page, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_API, None, 'Downloading login page', expected_status=401)
|
||||
if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
|
||||
self.write_debug('Cookies are valid, no login required.')
|
||||
return
|
||||
|
||||
if urlh.status == 401:
|
||||
self.write_debug('Got HTTP Error 401; cookies have been invalidated')
|
||||
login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
|
||||
74
yt_dlp/extractor/caffeinetv.py
Normal file
74
yt_dlp/extractor/caffeinetv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaffeineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'info_dict': {
|
||||
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOOOOD MORNINNNNN #highlights',
|
||||
'timestamp': 1654702180,
|
||||
'upload_date': '20220608',
|
||||
'uploader': 'RahJON Wicc',
|
||||
'uploader_id': 'TsuSurf',
|
||||
'duration': 3145,
|
||||
'age_limit': 17,
|
||||
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['highlights', 'battlerap'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
|
||||
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
|
||||
|
||||
video_url = broadcast_info['video_url']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(json_data, {
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
}),
|
||||
**traverse_obj(broadcast_info, {
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
'FOUR_PLUS': 0,
|
||||
'NINE_PLUS': 9,
|
||||
'TWELVE_PLUS': 12,
|
||||
'SEVENTEEN_PLUS': 17,
|
||||
}.get(broadcast_info.get('content_rating'), 17),
|
||||
}
|
||||
@@ -5,14 +5,14 @@ from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class ClippitIE(InfoExtractor):
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import collections
|
||||
import functools
|
||||
import getpass
|
||||
import hashlib
|
||||
import http.client
|
||||
@@ -21,7 +22,6 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
@@ -2451,7 +2451,7 @@ class InfoExtractor:
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
@@ -3398,23 +3398,16 @@ class InfoExtractor:
|
||||
return formats
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
||||
video_id=video_id,
|
||||
transform_source=transform_source)
|
||||
except ExtractorError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(jwplayer_data, dict):
|
||||
return jwplayer_data
|
||||
return self._search_json(
|
||||
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
|
||||
webpage, 'JWPlayer data', video_id,
|
||||
# must be a {...} or sequence, ending
|
||||
contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
|
||||
transform_source=transform_source, default=None)
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
webpage, video_id, transform_source=transform_source)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
@@ -3446,22 +3439,14 @@ class InfoExtractor:
|
||||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
for track in traverse_obj(video_data, (
|
||||
'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entry = {
|
||||
'id': this_video_id,
|
||||
@@ -3546,7 +3531,7 @@ class InfoExtractor:
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -13,7 +14,6 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
from ..compat import compat_HTMLParseError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
@@ -19,8 +21,6 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
from ..compat import compat_str
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
|
||||
|
||||
class DamtomoBaseIE(InfoExtractor):
|
||||
|
||||
197
yt_dlp/extractor/dangalplay.py
Normal file
197
yt_dlp/extractor/dangalplay.py
Normal file
@@ -0,0 +1,197 @@
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DangalPlayBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dangalplay'
|
||||
_OTV_USER_ID = None
|
||||
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
|
||||
_API_BASE = 'https://ottapi.dangalplay.com'
|
||||
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
|
||||
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._OTV_USER_ID:
|
||||
return
|
||||
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
self._OTV_USER_ID = password
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._OTV_USER_ID:
|
||||
self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
|
||||
|
||||
def _extract_episode_info(self, metadata, episode_slug, series_slug):
|
||||
return {
|
||||
'display_id': episode_slug,
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'season-(\d+)', series_slug, 'season number', default='1')),
|
||||
'series': series_slug,
|
||||
**traverse_obj(metadata, {
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
|
||||
headers={'Accept': 'application/json'}, query={
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'region': 'IN',
|
||||
**query,
|
||||
})
|
||||
|
||||
|
||||
class DangalPlayIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '647c61dc1e7171310dcd49b4',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1262304000,
|
||||
'episode_number': 1,
|
||||
'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'series': 'kitani-mohabbat-hai-season-2',
|
||||
'season_number': 2,
|
||||
'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'release_date': '20100101',
|
||||
'duration': 2325,
|
||||
'season': 'Season 2',
|
||||
'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '65d31d9ba73b9c3abd14a7f3',
|
||||
'ext': 'mp4',
|
||||
'episode': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'release_timestamp': 1708367411,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'title': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'duration': 156048,
|
||||
'release_date': '20240219',
|
||||
'season_number': 1,
|
||||
'series': 'MILKE BHI HUM NA MILE',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
|
||||
},
|
||||
}]
|
||||
|
||||
def _generate_api_data(self, data):
|
||||
catalog_id = data['catalog_id']
|
||||
content_id = data['content_id']
|
||||
timestamp = str(int(time.time()))
|
||||
unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
|
||||
|
||||
return json.dumps({
|
||||
'catalog_id': catalog_id,
|
||||
'content_id': content_id,
|
||||
'category': '',
|
||||
'region': 'IN',
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'id': self._OTV_USER_ID,
|
||||
'md5': hashlib.md5(unhashed.encode()).hexdigest(),
|
||||
'ts': timestamp,
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
|
||||
metadata = self._call_api(
|
||||
f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
|
||||
episode_slug, query={'item_language': ''})['data']
|
||||
|
||||
try:
|
||||
details = self._download_json(
|
||||
f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
|
||||
'Downloading playback details JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=self._generate_api_data(metadata))['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 422:
|
||||
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
|
||||
if error_info.get('code') == '1016':
|
||||
self.raise_login_required(
|
||||
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
|
||||
elif msg := error_info.get('message'):
|
||||
raise ExtractorError(msg)
|
||||
raise
|
||||
|
||||
m3u8_url = traverse_obj(details, (
|
||||
('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
|
||||
|
||||
return {
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._extract_episode_info(metadata, episode_slug, series_slug),
|
||||
}
|
||||
|
||||
|
||||
class DangalPlaySeasonIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
|
||||
'playlist_mincount': 170,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
|
||||
'playlist_count': 30,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
|
||||
},
|
||||
}, {
|
||||
# 1 season only, series page is season page
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
|
||||
'playlist_mincount': 15,
|
||||
'info_dict': {
|
||||
'id': 'milke-bhi-hum-na-mile',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, subcategories, series_slug):
|
||||
for subcategory in subcategories:
|
||||
data = self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
|
||||
series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
|
||||
'order_by': 'asc',
|
||||
'status': 'published',
|
||||
})
|
||||
for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
|
||||
episode_slug = ep['friendly_id']
|
||||
yield self.url_result(
|
||||
f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
|
||||
DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
|
||||
subcategories = [subcategory] if subcategory else traverse_obj(
|
||||
self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}.gzip', series_slug,
|
||||
'Downloading season info JSON', query={'item_language': ''}),
|
||||
('data', 'subcategories', ..., 'friendly_id', {str}))
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))
|
||||
@@ -1,11 +1,11 @@
|
||||
import re
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
remove_start,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
|
||||
@@ -2,9 +2,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
|
||||
@@ -2,10 +2,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import time
|
||||
import hashlib
|
||||
import time
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ import uuid
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
@@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
'drmSupported': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
'wisteriaProperties': {},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
|
||||
'info_dict': {
|
||||
'id': '4756322',
|
||||
'ext': 'mp4',
|
||||
'title': 'German Gold',
|
||||
'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
|
||||
'display_id': 'goldrausch-in-australien/german-gold',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Goldrausch in Australien',
|
||||
'duration': 2648.0,
|
||||
'upload_date': '20230517',
|
||||
'timestamp': 1684357500,
|
||||
'creators': ['DMAX'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
|
||||
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
'info_dict': {
|
||||
'id': '78867',
|
||||
@@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
'season_number': 1,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||
'only_matching': True,
|
||||
@@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (programme, alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', realm, country)
|
||||
url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
||||
@@ -2,8 +2,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@ import urllib.parse
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
|
||||
@@ -2,15 +2,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
|
||||
@@ -4,15 +4,15 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
xpath_text
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
try_get,
|
||||
mimetype2ext
|
||||
)
|
||||
from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
|
||||
|
||||
|
||||
class FancodeVodIE(InfoExtractor):
|
||||
|
||||
@@ -3,9 +3,9 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
|
||||
@@ -2,10 +2,10 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import (
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
|
||||
107
yt_dlp/extractor/gbnews.py
Normal file
107
yt_dlp/extractor/gbnews.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_elements_html_by_class,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GBNewsIE(InfoExtractor):
|
||||
IE_DESC = 'GB News clips, features and live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
|
||||
|
||||
_PLATFORM = 'safari'
|
||||
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||
'info_dict': {
|
||||
'id': '52264136',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||
'description': 'The post was criticised by former employers of the broadcaster',
|
||||
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
|
||||
'info_dict': {
|
||||
'id': '52328390',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
|
||||
'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family',
|
||||
'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.gbnews.uk/watchlive',
|
||||
'info_dict': {
|
||||
'id': '1069',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
|
||||
'display_id': 'watchlive',
|
||||
'live_status': 'is_live',
|
||||
'title': r're:^GB News Live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.lru_cache
|
||||
def _get_ss_endpoint(self, data_id, data_env):
|
||||
if not data_id:
|
||||
data_id = 'GB003'
|
||||
if not data_env:
|
||||
data_env = 'production'
|
||||
|
||||
json_data = self._download_json(
|
||||
self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={
|
||||
'id': data_id,
|
||||
'env': data_env,
|
||||
})
|
||||
meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none}))
|
||||
if not meta_url:
|
||||
raise ExtractorError('No API host found')
|
||||
|
||||
return meta_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).rpartition('/')[2]
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_data = None
|
||||
elements = get_elements_html_by_class('simplestream', webpage)
|
||||
for html_tag in elements:
|
||||
attributes = extract_attributes(html_tag)
|
||||
if 'sidebar' not in (attributes.get('class') or ''):
|
||||
video_data = attributes
|
||||
if not video_data:
|
||||
raise ExtractorError('Could not find video element', expected=True)
|
||||
|
||||
endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env'))
|
||||
|
||||
uvid = video_data['data-uvid']
|
||||
video_type = video_data.get('data-type')
|
||||
if not video_type or video_type == 'vod':
|
||||
video_type = 'show'
|
||||
stream_data = self._download_json(
|
||||
f'{endpoint_url}/api/{video_type}/stream/{uvid}',
|
||||
uvid, 'Downloading stream JSON', query={
|
||||
'key': video_data.get('data-key'),
|
||||
'platform': self._PLATFORM,
|
||||
})
|
||||
if traverse_obj(stream_data, 'drm'):
|
||||
self.report_drm(uvid)
|
||||
|
||||
return {
|
||||
'id': uvid,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(traverse_obj(stream_data, (
|
||||
'response', 'stream', {url_or_none})), uvid, 'mp4'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'is_live': video_type == 'live',
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import types
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor # isort: split
|
||||
from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
ExtractorError,
|
||||
bool_or_none,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
|
||||
@@ -3,9 +3,9 @@ import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
|
||||
@@ -3,16 +3,16 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
remove_start,
|
||||
remove_end,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
unified_timestamp,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get
|
||||
)
|
||||
from ..utils import ExtractorError, try_get
|
||||
|
||||
|
||||
class GofileIE(InfoExtractor):
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
url_or_none
|
||||
)
|
||||
|
||||
import json
|
||||
from ..utils import try_get, url_or_none
|
||||
|
||||
|
||||
class GoToStageIE(InfoExtractor):
|
||||
|
||||
@@ -2,11 +2,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
KNOWN_EXTENSIONS,
|
||||
determine_ext,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
|
||||
@@ -4,8 +4,8 @@ from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
|
||||
@@ -2,8 +2,8 @@ import hashlib
|
||||
import random
|
||||
import re
|
||||
|
||||
from ..compat import compat_urlparse, compat_b64decode
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode, compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -13,8 +13,6 @@ from ..utils import (
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
|
||||
from ..compat import compat_str
|
||||
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
|
||||
|
||||
|
||||
class IchinanaLiveIE(InfoExtractor):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .bokecc import BokeCCBaseIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
@@ -6,10 +7,9 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
|
||||
class InfoQIE(BokeCCBaseIE):
|
||||
|
||||
@@ -3,12 +3,12 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,20 +4,16 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_unquote
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ohdave_rsa_encrypt,
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,23 +1,22 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
JSON_LD_RE,
|
||||
ExtractorError,
|
||||
base_url,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
JSON_LD_RE,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
url_or_none,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import functools
|
||||
import urllib.parse
|
||||
import urllib.error
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import hashlib
|
||||
import random
|
||||
|
||||
from ..compat import compat_str
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
@@ -9,9 +10,8 @@ from ..utils import (
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url
|
||||
unsmuggle_url,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
def _parse_japanese_date(text):
|
||||
|
||||
403
yt_dlp/extractor/jiocinema.py
Normal file
403
yt_dlp/extractor/jiocinema.py
Normal file
@@ -0,0 +1,403 @@
|
||||
import base64
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioCinemaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'jiocinema'
|
||||
_GEO_BYPASS = False
|
||||
_ACCESS_TOKEN = None
|
||||
_REFRESH_TOKEN = None
|
||||
_GUEST_TOKEN = None
|
||||
_USER_ID = None
|
||||
_DEVICE_ID = None
|
||||
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
|
||||
_APP_NAME = {'appName': 'RJIL_JioCinema'}
|
||||
_APP_VERSION = {'appVersion': '5.0.0'}
|
||||
_API_SIGNATURES = 'o668nxgzwff'
|
||||
_METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
|
||||
_ACCESS_HINT = 'the `accessToken` from your browser local storage'
|
||||
_LOGIN_HINT = (
|
||||
'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
|
||||
f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
|
||||
'If you have previously logged in with yt-dlp and your session '
|
||||
'has been cached, you can use "-u device -p <DEVICE_ID>"')
|
||||
|
||||
def _cache_token(self, token_type):
|
||||
assert token_type in ('access', 'refresh', 'all')
|
||||
if token_type in ('access', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
|
||||
if token_type in ('refresh', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
|
||||
|
||||
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
|
||||
return self._download_json(
|
||||
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
**self._API_HEADERS,
|
||||
**headers,
|
||||
}, expected_status=(400, 403, 474))
|
||||
|
||||
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
|
||||
return self._call_api(
|
||||
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
|
||||
None, note=note, headers=headers, data=data)
|
||||
|
||||
def _refresh_token(self):
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
response = self._call_auth_api(
|
||||
'token', 'refreshtoken', 'Refreshing token',
|
||||
headers={'accesstoken': self._ACCESS_TOKEN}, data={
|
||||
**self._APP_NAME,
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'refreshToken': self._REFRESH_TOKEN,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
refresh_token = response.get('refreshTokenId')
|
||||
if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
self._cache_token('refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
self._cache_token('access')
|
||||
|
||||
def _fetch_guest_token(self):
|
||||
JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
|
||||
guest_token = self._call_auth_api(
|
||||
'token', 'guest', 'Downloading guest token', data={
|
||||
**self._APP_NAME,
|
||||
'deviceType': 'phone',
|
||||
'os': 'ios',
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'freshLaunch': False,
|
||||
'adId': self._DEVICE_ID,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
self._GUEST_TOKEN = guest_token['authToken']
|
||||
self._USER_ID = guest_token['userId']
|
||||
|
||||
def _call_login_api(self, endpoint, guest_token, data, note):
|
||||
return self._call_auth_api(
|
||||
'user', f'loginotp/{endpoint}', note, headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
**traverse_obj(guest_token, 'data', {
|
||||
'deviceType': ('deviceType', {str}),
|
||||
'os': ('os', {str}),
|
||||
})}, data=data)
|
||||
|
||||
def _is_token_expired(self, token):
|
||||
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
|
||||
return
|
||||
|
||||
UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
if username.lower() == 'token':
|
||||
if try_call(lambda: jwt_decode_hs256(password)):
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = password
|
||||
refresh_hint = 'the `refreshToken` UUID from your browser local storage'
|
||||
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
|
||||
if not refresh_token:
|
||||
self.to_screen(
|
||||
'To extend the life of your login session, in addition to your access token, '
|
||||
'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
|
||||
f'where REFRESH_TOKEN is {refresh_hint}')
|
||||
elif re.fullmatch(UUID_RE, refresh_token):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
else:
|
||||
self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
|
||||
|
||||
elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
|
||||
raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
|
||||
|
||||
elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
|
||||
self._fetch_guest_token()
|
||||
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
|
||||
initial_data = {
|
||||
'number': base64.b64encode(password.encode()).decode(),
|
||||
**self._APP_VERSION,
|
||||
}
|
||||
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
|
||||
if not traverse_obj(response, ('OTPInfo', {dict})):
|
||||
raise ExtractorError('There was a problem with the phone number login attempt')
|
||||
|
||||
is_iphone = guest_token.get('os') == 'ios'
|
||||
response = self._call_login_api('verify', guest_token, {
|
||||
'deviceInfo': {
|
||||
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
|
||||
'info': {
|
||||
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
|
||||
'androidId': self._DEVICE_ID,
|
||||
'type': 'iOS' if is_iphone else 'Android'
|
||||
}
|
||||
},
|
||||
**initial_data,
|
||||
'otp': self._get_tfa_info('the one-time password sent to your phone')
|
||||
}, 'Submitting OTP')
|
||||
if traverse_obj(response, 'code') == 1043:
|
||||
raise ExtractorError('Wrong OTP', expected=True)
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
|
||||
JioCinemaBaseIE._USER_ID = user_token['userId']
|
||||
JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
|
||||
self._cache_token('all')
|
||||
if self.get_param('cachedir') is not False:
|
||||
self.to_screen(
|
||||
f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
|
||||
elif not JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
self._cache_token('access')
|
||||
self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
|
||||
if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
|
||||
class JioCinemaIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
|
||||
'info_dict': {
|
||||
'id': '3759931',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pradeep to stop the wedding?',
|
||||
'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
|
||||
'episode': 'Pradeep to stop the wedding?',
|
||||
'episode_number': 89,
|
||||
'season': 'Agnisakshi…Ek Samjhauta-S1',
|
||||
'season_number': 1,
|
||||
'series': 'Agnisakshi Ek Samjhauta',
|
||||
'duration': 1238.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'season_id': '3698031',
|
||||
'upload_date': '20230606',
|
||||
'timestamp': 1686009600,
|
||||
'release_date': '20230607',
|
||||
'genres': ['Drama'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
|
||||
'info_dict': {
|
||||
'id': '3754021',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bhediya',
|
||||
'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
|
||||
'episode': 'Bhediya',
|
||||
'duration': 8500.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'upload_date': '20230525',
|
||||
'timestamp': 1685026200,
|
||||
'release_date': '20230524',
|
||||
'genres': ['Comedy'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _extract_formats_and_subtitles(self, playback, video_id):
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
|
||||
if not m3u8_url: # DRM-only content only serves dash urls
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
|
||||
'formats': traverse_obj(formats, (
|
||||
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
|
||||
self._fetch_guest_token()
|
||||
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
playback = self._call_api(
|
||||
f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
|
||||
'Downloading playback JSON', headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
'deviceid': self._DEVICE_ID,
|
||||
'uniqueid': self._USER_ID,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
'x-platform': 'androidweb',
|
||||
'x-platform-token': 'web',
|
||||
}, data={
|
||||
'4k': False,
|
||||
'ageGroup': '18+',
|
||||
'appVersion': '3.4.0',
|
||||
'bitrateProfile': 'xhdpi',
|
||||
'capability': {
|
||||
'drmCapability': {
|
||||
'aesSupport': 'yes',
|
||||
'fairPlayDrmSupport': 'none',
|
||||
'playreadyDrmSupport': 'none',
|
||||
'widevineDRMSupport': 'none'
|
||||
},
|
||||
'frameRateCapability': [{
|
||||
'frameRateSupport': '30fps',
|
||||
'videoQuality': '1440p'
|
||||
}]
|
||||
},
|
||||
'continueWatchingRequired': False,
|
||||
'dolby': False,
|
||||
'downloadRequest': False,
|
||||
'hevc': False,
|
||||
'kidsSafe': False,
|
||||
'manufacturer': 'Windows',
|
||||
'model': 'Windows',
|
||||
'multiAudioRequired': True,
|
||||
'osVersion': '10',
|
||||
'parentalPinValid': True,
|
||||
'x-apisignatures': self._API_SIGNATURES
|
||||
})
|
||||
|
||||
status_code = traverse_obj(playback, ('code', {int}))
|
||||
if status_code == 474:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
elif status_code == 1008:
|
||||
error_msg = 'This content is only available for premium users'
|
||||
if self._ACCESS_TOKEN:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
|
||||
elif status_code == 400:
|
||||
raise ExtractorError('The requested content is not available', expected=True)
|
||||
elif status_code is not None and status_code != 200:
|
||||
raise ExtractorError(
|
||||
f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
|
||||
video_id, fatal=False, query={
|
||||
'ids': f'include:{video_id}',
|
||||
'responseType': 'common',
|
||||
'devicePlatformType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'http_headers': self._API_HEADERS,
|
||||
**self._extract_formats_and_subtitles(playback, video_id),
|
||||
**traverse_obj(playback, ('data', {
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, {lambda x: x or None}),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, {lambda x: x or None}),
|
||||
'season': ('seasonName', {str}, {lambda x: x or None}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, {lambda x: x or None}),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
'release_date': ('telecastDate', {str}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'genres': ('genres', ..., {str}),
|
||||
'thumbnail': ('seo', 'ogImage', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
|
||||
'info_dict': {
|
||||
'id': '3499917',
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
|
||||
'Downloading series metadata JSON', query={
|
||||
'sort': 'season:asc',
|
||||
'id': series_id,
|
||||
'responseType': 'common',
|
||||
})
|
||||
|
||||
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
|
||||
season_id = season['id']
|
||||
label = season.get('season') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
season_id, f'Downloading season {label} page {page_num} JSON', query={
|
||||
'sort': 'episode:asc',
|
||||
'id': season_id,
|
||||
'responseType': 'common',
|
||||
'page': page_num,
|
||||
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
|
||||
if not episodes:
|
||||
break
|
||||
for episode in episodes:
|
||||
yield self.url_result(
|
||||
episode['slug'], JioCinemaIE, **traverse_obj(episode, {
|
||||
'video_id': 'id',
|
||||
'video_title': ('fullTitle', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, series_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(self._entries(series_id), series_id, slug)
|
||||
@@ -1,8 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate
|
||||
)
|
||||
from ..utils import ExtractorError, unified_strdate
|
||||
|
||||
|
||||
class JoveIE(InfoExtractor):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import base64
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
||||
@@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -4,18 +4,18 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
remove_start
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
import hashlib
|
||||
import random
|
||||
import string
|
||||
import hashlib
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -3,10 +3,10 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
clean_html,
|
||||
get_element_by_id,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,9 +1,25 @@
|
||||
from .common import InfoExtractor
|
||||
from .wat import WatIE
|
||||
from ..utils import ExtractorError, int_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LCIIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
|
||||
'info_dict': {
|
||||
'id': '14113788',
|
||||
'ext': 'mp4',
|
||||
'title': '24H Pujadas du vendredi 24 mai 2024',
|
||||
'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
|
||||
'upload_date': '20240524',
|
||||
'duration': 6158,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
|
||||
'info_dict': {
|
||||
'id': '13875948',
|
||||
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
|
||||
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id)
|
||||
wat_id = traverse_obj(next_data, (
|
||||
'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
|
||||
if wat_id is None:
|
||||
raise ExtractorError('Could not find wat_id')
|
||||
|
||||
return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from .arkena import ArkenaIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@@ -4,8 +4,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
|
||||
@@ -11,9 +11,9 @@ from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
encode_data_uri,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user