1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-02-17 14:05:48 +00:00

Merge branch 'yt-dlp:master' into pr/live-sections

This commit is contained in:
bashonly
2024-05-28 13:22:13 -05:00
295 changed files with 3573 additions and 1913 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -6,10 +6,10 @@ import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
dict_get,
ExtractorError,
js_to_json,
dict_get,
int_or_none,
js_to_json,
parse_iso8601,
str_or_none,
traverse_obj,

View File

@@ -12,20 +12,21 @@ import urllib.parse
import urllib.request
import urllib.response
import uuid
from ..utils.networking import clean_proxies
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
ExtractorError,
OnDemandPagedList,
bytes_to_intlist,
decode_base_n,
int_or_none,
intlist_to_bytes,
OnDemandPagedList,
time_seconds,
traverse_obj,
update_url_query,
)
from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking

View File

@@ -3,10 +3,10 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
str_or_none,
traverse_obj,
parse_codecs,
parse_qs,
str_or_none,
traverse_obj,
)

View File

@@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long,
ExtractorError,
float_or_none,
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
strip_or_none,
str_or_none,
strip_or_none,
try_get,
unified_strdate,
urlencode_postdata,

View File

@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ISO639Utils,
OnDemandPagedList,
float_or_none,
int_or_none,
ISO639Utils,
join_nonempty,
OnDemandPagedList,
parse_duration,
str_or_none,
str_to_int,

View File

@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj
traverse_obj,
)

View File

@@ -12,7 +12,6 @@ from ..utils import (
)
from ..utils.traversal import traverse_obj
_FIELDS = '''
_id
clipImageSource

View File

@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
parse_duration,
parse_filesize,
int_or_none,
parse_iso8601,
)

View File

@@ -1,17 +1,13 @@
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
urlencode_postdata,
urljoin,
int_or_none,
clean_html,
ExtractorError
)

View File

@@ -1,6 +1,6 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..utils import (
int_or_none,
parse_iso8601,

View File

@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,

View File

@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
unified_timestamp
unified_timestamp,
)

View File

@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
from ..utils import url_or_none, merge_dicts
from ..utils import merge_dicts, url_or_none
class AngelIE(InfoExtractor):

View File

@@ -1,8 +1,5 @@
from .common import InfoExtractor
from ..utils import (
str_to_int,
ExtractorError
)
from ..utils import ExtractorError, str_to_int
class AppleConnectIE(InfoExtractor):

View File

@@ -1,5 +1,5 @@
import re
import json
import re
from .common import InfoExtractor
from ..compat import compat_urlparse

View File

@@ -4,8 +4,8 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
format_field,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
remove_start,

View File

@@ -2,10 +2,10 @@ import datetime as dt
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
jwt_encode_hs256,
try_get,
ExtractorError,
)

View File

@@ -2,8 +2,8 @@ import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
format_field,

View File

@@ -2,12 +2,12 @@ import math
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
format_field,
InAdvancePagedList,
format_field,
traverse_obj,
unified_timestamp,
)

View File

@@ -2,11 +2,11 @@ import json
from .common import InfoExtractor
from ..utils import (
try_get,
int_or_none,
url_or_none,
float_or_none,
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from .amp import AMPIE
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,

View File

@@ -1,3 +1,4 @@
from .common import InfoExtractor
from ..utils import (
mimetype2ext,
parse_duration,
@@ -5,7 +6,6 @@ from ..utils import (
str_or_none,
traverse_obj,
)
from .common import InfoExtractor
class BloggerIE(InfoExtractor):

View File

@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
)

View File

@@ -1,9 +1,5 @@
from .common import InfoExtractor
from ..utils import (
js_to_json,
traverse_obj,
unified_timestamp
)
from ..utils import js_to_json, traverse_obj, unified_timestamp
class BoxCastVideoIE(InfoExtractor):

View File

@@ -6,7 +6,7 @@ from ..utils import (
classproperty,
int_or_none,
traverse_obj,
urljoin
urljoin,
)

View File

@@ -12,10 +12,11 @@ from ..compat import (
)
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
UnsupportedError,
clean_html,
dict_get,
extract_attributes,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
@@ -29,7 +30,6 @@ from ..utils import (
try_get,
unescapeHTML,
unsmuggle_url,
UnsupportedError,
update_url_query,
url_or_none,
)

View File

@@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor):
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage(
self._LOGIN_API, None, 'Downloading login page'))
login_page, urlh = self._download_webpage_handle(
self._LOGIN_API, None, 'Downloading login page', expected_status=401)
if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
self.write_debug('Cookies are valid, no login required.')
return
if urlh.status == 401:
self.write_debug('Got HTTP Error 401; cookies have been invalidated')
login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,

View File

@@ -0,0 +1,74 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
urljoin,
)
class CaffeineTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
_TESTS = [{
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
'info_dict': {
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
'ext': 'mp4',
'title': 'GOOOOD MORNINNNNN #highlights',
'timestamp': 1654702180,
'upload_date': '20220608',
'uploader': 'RahJON Wicc',
'uploader_id': 'TsuSurf',
'duration': 3145,
'age_limit': 17,
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': ['highlights', 'battlerap'],
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
video_url = broadcast_info['video_url']
ext = determine_ext(video_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
else:
formats = [{'url': video_url}]
return {
'id': video_id,
'formats': formats,
**traverse_obj(json_data, {
'like_count': ('like_count', {int_or_none}),
'view_count': ('view_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'tags': ('tags', ..., {str}, {lambda x: x or None}),
'uploader': ('user', 'name', {str}),
'uploader_id': (((None, 'user'), 'username'), {str}, any),
'is_live': ('is_live', {bool}),
}),
**traverse_obj(broadcast_info, {
'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
}),
'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
'FOUR_PLUS': 0,
'NINE_PLUS': 9,
'TWELVE_PLUS': 12,
'SEVENTEEN_PLUS': 17,
}.get(broadcast_info.get('content_rating'), 17),
}

View File

@@ -5,14 +5,14 @@ from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
extract_attributes,
find_xpath_attr,
get_element_html_by_id,
int_or_none,
find_xpath_attr,
smuggle_url,
xpath_element,
xpath_text,
update_url_query,
url_or_none,
xpath_element,
xpath_text,
)

View File

@@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor):
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')

View File

@@ -1,4 +1,5 @@
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (

View File

@@ -1,11 +1,11 @@
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
import re
class ClippitIE(InfoExtractor):

View File

@@ -1,5 +1,6 @@
import base64
import collections
import functools
import getpass
import hashlib
import http.client
@@ -21,7 +22,6 @@ import urllib.parse
import urllib.request
import xml.etree.ElementTree
from ..compat import functools # isort: split
from ..compat import (
compat_etree_fromstring,
compat_expanduser,
@@ -2451,7 +2451,7 @@ class InfoExtractor:
})
continue
src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -3398,23 +3398,16 @@ class InfoExtractor:
return formats
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search(
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
webpage)
if mobj:
try:
jwplayer_data = self._parse_json(mobj.group('options'),
video_id=video_id,
transform_source=transform_source)
except ExtractorError:
pass
else:
if isinstance(jwplayer_data, dict):
return jwplayer_data
return self._search_json(
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
webpage, 'JWPlayer data', video_id,
# must be a {...} or sequence, ending
contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
transform_source=transform_source, default=None)
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
webpage, video_id, transform_source=transform_source)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
@@ -3446,22 +3439,14 @@ class InfoExtractor:
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
subtitles = {}
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
for track in traverse_obj(video_data, (
'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
entry = {
'id': this_video_id,
@@ -3546,7 +3531,7 @@ class InfoExtractor:
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
rtmp_url_parts = re.split(
r'((?:mp4|mp3|flv):)', source_url, 1)
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({

View File

@@ -1,7 +1,7 @@
from .theplatform import ThePlatformFeedIE
from ..utils import (
dict_get,
ExtractorError,
dict_get,
float_or_none,
int_or_none,
)

View File

@@ -6,6 +6,7 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
@@ -13,7 +14,6 @@ from ..utils import (
parse_age_limit,
parse_duration,
url_or_none,
ExtractorError
)

View File

@@ -1,10 +1,12 @@
import re
from .common import InfoExtractor
from .senategov import SenateISVPIE
from .ustream import UstreamIE
from ..compat import compat_HTMLParseError
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
extract_attributes,
find_xpath_attr,
get_element_by_attribute,
@@ -19,8 +21,6 @@ from ..utils import (
str_to_int,
unescapeHTML,
)
from .senategov import SenateISVPIE
from .ustream import UstreamIE
class CSpanIE(InfoExtractor):

View File

@@ -1,6 +1,6 @@
from .common import InfoExtractor
from ..utils import unified_timestamp
from .youtube import YoutubeIE
from ..utils import unified_timestamp
class CtsNewsIE(InfoExtractor):

View File

@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
determine_protocol,
int_or_none,
try_get,
unescapeHTML,
)

View File

@@ -1,8 +1,8 @@
import re
from .common import InfoExtractor
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
from ..compat import compat_str
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
class DamtomoBaseIE(InfoExtractor):

View File

@@ -0,0 +1,197 @@
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
from ..utils.traversal import traverse_obj
class DangalPlayBaseIE(InfoExtractor):
_NETRC_MACHINE = 'dangalplay'
_OTV_USER_ID = None
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
_API_BASE = 'https://ottapi.dangalplay.com'
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
def _perform_login(self, username, password):
if self._OTV_USER_ID:
return
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
raise ExtractorError(self._LOGIN_HINT, expected=True)
self._OTV_USER_ID = password
def _real_initialize(self):
if not self._OTV_USER_ID:
self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
def _extract_episode_info(self, metadata, episode_slug, series_slug):
return {
'display_id': episode_slug,
'episode_number': int_or_none(self._search_regex(
r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
'season_number': int_or_none(self._search_regex(
r'season-(\d+)', series_slug, 'season number', default='1')),
'series': series_slug,
**traverse_obj(metadata, {
'id': ('content_id', {str}),
'title': ('display_title', {str}),
'episode': ('title', {str}),
'series': ('show_name', {str}, {lambda x: x or None}),
'series_id': ('catalog_id', {str}),
'duration': ('duration', {int_or_none}),
'release_timestamp': ('release_date_uts', {int_or_none}),
}),
}
def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
return self._download_json(
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
headers={'Accept': 'application/json'}, query={
'auth_token': self._AUTH_TOKEN,
'region': 'IN',
**query,
})
class DangalPlayIE(DangalPlayBaseIE):
IE_NAME = 'dangalplay'
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
'info_dict': {
'id': '647c61dc1e7171310dcd49b4',
'ext': 'mp4',
'release_timestamp': 1262304000,
'episode_number': 1,
'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
'series': 'kitani-mohabbat-hai-season-2',
'season_number': 2,
'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
'release_date': '20100101',
'duration': 2325,
'season': 'Season 2',
'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
'series_id': '645c9ea41e717158ca574966',
},
}, {
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
'info_dict': {
'id': '65d31d9ba73b9c3abd14a7f3',
'ext': 'mp4',
'episode': 'EP 1 | MILKE BHI HUM NA MILE',
'release_timestamp': 1708367411,
'episode_number': 1,
'season': 'Season 1',
'title': 'EP 1 | MILKE BHI HUM NA MILE',
'duration': 156048,
'release_date': '20240219',
'season_number': 1,
'series': 'MILKE BHI HUM NA MILE',
'series_id': '645c9ea41e717158ca574966',
'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
},
}]
def _generate_api_data(self, data):
catalog_id = data['catalog_id']
content_id = data['content_id']
timestamp = str(int(time.time()))
unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
return json.dumps({
'catalog_id': catalog_id,
'content_id': content_id,
'category': '',
'region': 'IN',
'auth_token': self._AUTH_TOKEN,
'id': self._OTV_USER_ID,
'md5': hashlib.md5(unhashed.encode()).hexdigest(),
'ts': timestamp,
}, separators=(',', ':')).encode()
def _real_extract(self, url):
series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
metadata = self._call_api(
f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
episode_slug, query={'item_language': ''})['data']
try:
details = self._download_json(
f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
'Downloading playback details JSON', headers={
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=self._generate_api_data(metadata))['data']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 422:
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
if error_info.get('code') == '1016':
self.raise_login_required(
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
elif msg := error_info.get('message'):
raise ExtractorError(msg)
raise
m3u8_url = traverse_obj(details, (
('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
return {
'formats': formats,
'subtitles': subtitles,
**self._extract_episode_info(metadata, episode_slug, series_slug),
}
class DangalPlaySeasonIE(DangalPlayBaseIE):
IE_NAME = 'dangalplay:season'
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
'playlist_mincount': 170,
'info_dict': {
'id': 'kitani-mohabbat-hai-season-1',
},
}, {
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
'playlist_count': 30,
'info_dict': {
'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
},
}, {
# 1 season only, series page is season page
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
'playlist_mincount': 15,
'info_dict': {
'id': 'milke-bhi-hum-na-mile',
},
}]
def _entries(self, subcategories, series_slug):
for subcategory in subcategories:
data = self._call_api(
f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
'order_by': 'asc',
'status': 'published',
})
for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
episode_slug = ep['friendly_id']
yield self.url_result(
f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
def _real_extract(self, url):
series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
subcategories = [subcategory] if subcategory else traverse_obj(
self._call_api(
f'catalogs/shows/items/{series_slug}.gzip', series_slug,
'Downloading season info JSON', query={'item_language': ''}),
('data', 'subcategories', ..., 'friendly_id', {str}))
return self.playlist_result(
self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))

View File

@@ -1,11 +1,11 @@
import re
import os.path
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
url_basename,
remove_start,
url_basename,
)

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_resolution,

View File

@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
extract_attributes,
ExtractorError,
int_or_none,
parse_age_limit,
remove_end,

View File

@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
determine_ext,
int_or_none,
join_nonempty,
unified_strdate,
update_url_query,
)

View File

@@ -1,5 +1,5 @@
import time
import hashlib
import time
import urllib
import uuid

View File

@@ -4,8 +4,8 @@ import uuid
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
remove_start,
@@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
video_id, headers=headers, data=json.dumps({
'deviceInfo': {
'adBlocker': False,
'drmSupported': False,
},
'videoId': video_id,
'wisteriaProperties': {
'platform': 'desktop',
'product': self._PRODUCT,
},
'wisteriaProperties': {},
}).encode('utf-8'))['data']['attributes']['streaming']
def _real_extract(self, url):
@@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
})
class DiscoveryNetworksDeIE(DPlayBaseIE):
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
'info_dict': {
'id': '4756322',
'ext': 'mp4',
'title': 'German Gold',
'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
'display_id': 'goldrausch-in-australien/german-gold',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 5',
'season_number': 5,
'series': 'Goldrausch in Australien',
'duration': 2648.0,
'upload_date': '20230517',
'timestamp': 1684357500,
'creators': ['DMAX'],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
'info_dict': {
'id': '78867',
@@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
'season_number': 1,
'thumbnail': r're:https://.+\.jpg',
},
'params': {
'skip_download': True,
},
'skip': '404 Not Found',
}, {
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True,
@@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
return self._get_disco_api_info(
url, '%s/%s' % (programme, alternate_id),
'sonic-eu1-prod.disco-api.com', realm, country)
url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
class DiscoveryPlusShowBaseIE(DPlayBaseIE):

View File

@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
NO_DEFAULT,
int_or_none,
parse_duration,
str_to_int,
)

View File

@@ -5,9 +5,9 @@ import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
ExtractorError,
get_elements_by_class,
int_or_none,
js_to_json,

View File

@@ -2,15 +2,15 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
parse_iso8601,
try_get,
unescapeHTML,
parse_iso8601,
)

View File

@@ -1,10 +1,10 @@
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
unified_strdate,
url_or_none,
)
from ..compat import compat_urlparse
class DWIE(InfoExtractor):

View File

@@ -4,15 +4,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
ExtractorError,
dict_get,
int_or_none,
merge_dicts,
parse_qs,
parse_age_limit,
parse_iso8601,
parse_qs,
str_or_none,
try_get,
url_or_none,

View File

@@ -8,7 +8,7 @@ from ..utils import (
qualities,
traverse_obj,
unified_strdate,
xpath_text
xpath_text,
)

View File

@@ -1,8 +1,7 @@
from .common import InfoExtractor
from ..utils import (
parse_duration,
js_to_json,
parse_duration,
)

View File

@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..utils import (
xpath_text,
parse_duration,
ExtractorError,
parse_duration,
xpath_text,
)

View File

@@ -1,12 +1,6 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_iso8601,
ExtractorError,
try_get,
mimetype2ext
)
from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
class FancodeVodIE(InfoExtractor):

View File

@@ -3,9 +3,9 @@ import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..utils import (
int_or_none,
xpath_element,
xpath_text,
int_or_none,
)

View File

@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
int_or_none,
)

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,

View File

@@ -2,10 +2,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
qualities,
strip_or_none,
int_or_none,
ExtractorError,
)

View File

@@ -7,7 +7,7 @@ from ..utils import (
parse_codecs,
parse_duration,
str_to_int,
unified_timestamp
unified_timestamp,
)

View File

@@ -10,7 +10,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
try_get
try_get,
)

View File

@@ -1,4 +1,5 @@
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,

107
yt_dlp/extractor/gbnews.py Normal file
View File

@@ -0,0 +1,107 @@
import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
get_elements_html_by_class,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GBNewsIE(InfoExtractor):
IE_DESC = 'GB News clips, features and live streams'
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
_PLATFORM = 'safari'
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
_TESTS = [{
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
'info_dict': {
'id': '52264136',
'ext': 'mp4',
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
'description': 'The post was criticised by former employers of the broadcaster',
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
},
}, {
'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
'info_dict': {
'id': '52328390',
'ext': 'mp4',
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family',
'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'',
}
}, {
'url': 'https://www.gbnews.uk/watchlive',
'info_dict': {
'id': '1069',
'ext': 'mp4',
'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
'display_id': 'watchlive',
'live_status': 'is_live',
'title': r're:^GB News Live',
},
'params': {'skip_download': 'm3u8'},
}]
@functools.lru_cache
def _get_ss_endpoint(self, data_id, data_env):
if not data_id:
data_id = 'GB003'
if not data_env:
data_env = 'production'
json_data = self._download_json(
self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={
'id': data_id,
'env': data_env,
})
meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none}))
if not meta_url:
raise ExtractorError('No API host found')
return meta_url
def _real_extract(self, url):
display_id = self._match_id(url).rpartition('/')[2]
webpage = self._download_webpage(url, display_id)
video_data = None
elements = get_elements_html_by_class('simplestream', webpage)
for html_tag in elements:
attributes = extract_attributes(html_tag)
if 'sidebar' not in (attributes.get('class') or ''):
video_data = attributes
if not video_data:
raise ExtractorError('Could not find video element', expected=True)
endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env'))
uvid = video_data['data-uvid']
video_type = video_data.get('data-type')
if not video_type or video_type == 'vod':
video_type = 'show'
stream_data = self._download_json(
f'{endpoint_url}/api/{video_type}/stream/{uvid}',
uvid, 'Downloading stream JSON', query={
'key': video_data.get('data-key'),
'platform': self._PLATFORM,
})
if traverse_obj(stream_data, 'drm'):
self.report_drm(uvid)
return {
'id': uvid,
'display_id': display_id,
'title': self._og_search_title(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
'formats': self._extract_m3u8_formats(traverse_obj(stream_data, (
'response', 'stream', {url_or_none})), uvid, 'mp4'),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'is_live': video_type == 'live',
}

View File

@@ -4,7 +4,7 @@ import types
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor # isort: split
from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring

View File

@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
bool_or_none,
ExtractorError,
bool_or_none,
dict_get,
float_or_none,
int_or_none,

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,

View File

@@ -3,9 +3,9 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
extract_attributes,
ExtractorError,
int_or_none,
parse_qs,
smuggle_url,

View File

@@ -3,16 +3,16 @@ import re
from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
int_or_none,
determine_ext,
parse_age_limit,
remove_start,
remove_end,
try_get,
urlencode_postdata,
ExtractorError,
unified_timestamp,
determine_ext,
int_or_none,
parse_age_limit,
remove_end,
remove_start,
traverse_obj,
try_get,
unified_timestamp,
urlencode_postdata,
)

View File

@@ -4,7 +4,7 @@ from ..utils import (
determine_ext,
str_or_none,
unified_timestamp,
url_or_none
url_or_none,
)
from ..utils.traversal import traverse_obj

View File

@@ -1,10 +1,7 @@
import hashlib
from .common import InfoExtractor
from ..utils import (
ExtractorError,
try_get
)
from ..utils import ExtractorError, try_get
class GofileIE(InfoExtractor):

View File

@@ -1,11 +1,8 @@
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
try_get,
url_or_none
)
import json
from ..utils import try_get, url_or_none
class GoToStageIE(InfoExtractor):

View File

@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
xpath_element,
int_or_none,
parse_duration,
urljoin,
xpath_element,
xpath_text,
)

View File

@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
KNOWN_EXTENSIONS,
determine_ext,
str_to_int,
)

View File

@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
ExtractorError,
clean_html,
int_or_none,
merge_dicts,
parse_count,

View File

@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
clean_html,
ExtractorError,
clean_html,
int_or_none,
parse_age_limit,
try_get,

View File

@@ -2,8 +2,8 @@ import hashlib
import random
import re
from ..compat import compat_urlparse, compat_b64decode
from .common import InfoExtractor
from ..compat import compat_b64decode, compat_urlparse
from ..utils import (
ExtractorError,
int_or_none,
@@ -13,8 +13,6 @@ from ..utils import (
update_url_query,
)
from .common import InfoExtractor
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'

View File

@@ -1,6 +1,6 @@
from .common import InfoExtractor
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
from ..compat import compat_str
from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
class IchinanaLiveIE(InfoExtractor):

View File

@@ -1,3 +1,4 @@
from .bokecc import BokeCCBaseIE
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
@@ -6,10 +7,9 @@ from ..compat import (
from ..utils import (
ExtractorError,
determine_ext,
update_url_query,
traverse_obj,
update_url_query,
)
from .bokecc import BokeCCBaseIE
class InfoQIE(BokeCCBaseIE):

View File

@@ -3,12 +3,12 @@ import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
js_to_json,
urlencode_postdata,
ExtractorError,
parse_qs,
traverse_obj
traverse_obj,
urlencode_postdata,
)

View File

@@ -4,20 +4,16 @@ import re
import time
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_unquote
)
from .openload import PhantomJSwrapper
from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode
from ..utils import (
ExtractorError,
clean_html,
decode_packed_codes,
ExtractorError,
float_or_none,
format_field,
get_element_by_id,
get_element_by_attribute,
get_element_by_id,
int_or_none,
js_to_json,
ohdave_rsa_encrypt,

View File

@@ -1,12 +1,11 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
urljoin
urljoin,
)

View File

@@ -1,23 +1,22 @@
import json
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
JSON_LD_RE,
ExtractorError,
base_url,
clean_html,
determine_ext,
extract_attributes,
ExtractorError,
get_element_by_class,
JSON_LD_RE,
merge_dicts,
parse_duration,
smuggle_url,
try_get,
url_or_none,
url_basename,
url_or_none,
urljoin,
)

View File

@@ -1,9 +1,9 @@
import functools
import urllib.parse
import urllib.error
import hashlib
import json
import time
import urllib.error
import urllib.parse
from .common import InfoExtractor
from ..utils import (

View File

@@ -1,8 +1,8 @@
import hashlib
import random
from ..compat import compat_str
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,

View File

@@ -1,5 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
@@ -9,9 +10,8 @@ from ..utils import (
smuggle_url,
traverse_obj,
try_call,
unsmuggle_url
unsmuggle_url,
)
from .common import InfoExtractor
def _parse_japanese_date(text):

View File

@@ -0,0 +1,403 @@
import base64
import itertools
import json
import random
import re
import string
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
try_call,
url_or_none,
)
from ..utils.traversal import traverse_obj
class JioCinemaBaseIE(InfoExtractor):
_NETRC_MACHINE = 'jiocinema'
_GEO_BYPASS = False
_ACCESS_TOKEN = None
_REFRESH_TOKEN = None
_GUEST_TOKEN = None
_USER_ID = None
_DEVICE_ID = None
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
_APP_NAME = {'appName': 'RJIL_JioCinema'}
_APP_VERSION = {'appVersion': '5.0.0'}
_API_SIGNATURES = 'o668nxgzwff'
_METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
_ACCESS_HINT = 'the `accessToken` from your browser local storage'
_LOGIN_HINT = (
'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
'If you have previously logged in with yt-dlp and your session '
'has been cached, you can use "-u device -p <DEVICE_ID>"')
def _cache_token(self, token_type):
assert token_type in ('access', 'refresh', 'all')
if token_type in ('access', 'all'):
self.cache.store(
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
if token_type in ('refresh', 'all'):
self.cache.store(
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
return self._download_json(
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
'Content-Type': 'application/json',
'Accept': 'application/json',
**self._API_HEADERS,
**headers,
}, expected_status=(400, 403, 474))
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
return self._call_api(
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
None, note=note, headers=headers, data=data)
def _refresh_token(self):
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
raise ExtractorError('User token has expired', expected=True)
response = self._call_auth_api(
'token', 'refreshtoken', 'Refreshing token',
headers={'accesstoken': self._ACCESS_TOKEN}, data={
**self._APP_NAME,
'deviceId': self._DEVICE_ID,
'refreshToken': self._REFRESH_TOKEN,
**self._APP_VERSION,
})
refresh_token = response.get('refreshTokenId')
if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
self._cache_token('refresh')
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
self._cache_token('access')
def _fetch_guest_token(self):
JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
guest_token = self._call_auth_api(
'token', 'guest', 'Downloading guest token', data={
**self._APP_NAME,
'deviceType': 'phone',
'os': 'ios',
'deviceId': self._DEVICE_ID,
'freshLaunch': False,
'adId': self._DEVICE_ID,
**self._APP_VERSION,
})
self._GUEST_TOKEN = guest_token['authToken']
self._USER_ID = guest_token['userId']
def _call_login_api(self, endpoint, guest_token, data, note):
return self._call_auth_api(
'user', f'loginotp/{endpoint}', note, headers={
**self.geo_verification_headers(),
'accesstoken': self._GUEST_TOKEN,
**self._APP_NAME,
**traverse_obj(guest_token, 'data', {
'deviceType': ('deviceType', {str}),
'os': ('os', {str}),
})}, data=data)
def _is_token_expired(self, token):
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
def _perform_login(self, username, password):
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
return
UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
if username.lower() == 'token':
if try_call(lambda: jwt_decode_hs256(password)):
JioCinemaBaseIE._ACCESS_TOKEN = password
refresh_hint = 'the `refreshToken` UUID from your browser local storage'
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
if not refresh_token:
self.to_screen(
'To extend the life of your login session, in addition to your access token, '
'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
f'where REFRESH_TOKEN is {refresh_hint}')
elif re.fullmatch(UUID_RE, refresh_token):
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
else:
self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
else:
raise ExtractorError(
f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
self._fetch_guest_token()
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
initial_data = {
'number': base64.b64encode(password.encode()).decode(),
**self._APP_VERSION,
}
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
if not traverse_obj(response, ('OTPInfo', {dict})):
raise ExtractorError('There was a problem with the phone number login attempt')
is_iphone = guest_token.get('os') == 'ios'
response = self._call_login_api('verify', guest_token, {
'deviceInfo': {
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
'info': {
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
'androidId': self._DEVICE_ID,
'type': 'iOS' if is_iphone else 'Android'
}
},
**initial_data,
'otp': self._get_tfa_info('the one-time password sent to your phone')
}, 'Submitting OTP')
if traverse_obj(response, 'code') == 1043:
raise ExtractorError('Wrong OTP', expected=True)
JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
JioCinemaBaseIE._USER_ID = user_token['userId']
JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
self._cache_token('all')
if self.get_param('cachedir') is not False:
self.to_screen(
f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
elif not JioCinemaBaseIE._REFRESH_TOKEN:
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
if JioCinemaBaseIE._REFRESH_TOKEN:
self._cache_token('access')
self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
self._refresh_token()
class JioCinemaIE(JioCinemaBaseIE):
IE_NAME = 'jiocinema'
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
'info_dict': {
'id': '3759931',
'ext': 'mp4',
'title': 'Pradeep to stop the wedding?',
'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
'episode': 'Pradeep to stop the wedding?',
'episode_number': 89,
'season': 'Agnisakshi…Ek Samjhauta-S1',
'season_number': 1,
'series': 'Agnisakshi Ek Samjhauta',
'duration': 1238.0,
'thumbnail': r're:https?://.+\.jpg',
'age_limit': 13,
'season_id': '3698031',
'upload_date': '20230606',
'timestamp': 1686009600,
'release_date': '20230607',
'genres': ['Drama'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
'info_dict': {
'id': '3754021',
'ext': 'mp4',
'title': 'Bhediya',
'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
'episode': 'Bhediya',
'duration': 8500.0,
'thumbnail': r're:https?://.+\.jpg',
'age_limit': 13,
'upload_date': '20230525',
'timestamp': 1685026200,
'release_date': '20230524',
'genres': ['Comedy'],
},
'params': {'skip_download': 'm3u8'},
}]
def _extract_formats_and_subtitles(self, playback, video_id):
m3u8_url = traverse_obj(playback, (
'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
if not m3u8_url: # DRM-only content only serves dash urls
self.report_drm(video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
self._remove_duplicate_formats(formats)
return {
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
'formats': traverse_obj(formats, (
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
'subtitles': subtitles,
}
def _real_extract(self, url):
video_id = self._match_id(url)
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
self._fetch_guest_token()
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
self._refresh_token()
playback = self._call_api(
f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
'Downloading playback JSON', headers={
**self.geo_verification_headers(),
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
**self._APP_NAME,
'deviceid': self._DEVICE_ID,
'uniqueid': self._USER_ID,
'x-apisignatures': self._API_SIGNATURES,
'x-platform': 'androidweb',
'x-platform-token': 'web',
}, data={
'4k': False,
'ageGroup': '18+',
'appVersion': '3.4.0',
'bitrateProfile': 'xhdpi',
'capability': {
'drmCapability': {
'aesSupport': 'yes',
'fairPlayDrmSupport': 'none',
'playreadyDrmSupport': 'none',
'widevineDRMSupport': 'none'
},
'frameRateCapability': [{
'frameRateSupport': '30fps',
'videoQuality': '1440p'
}]
},
'continueWatchingRequired': False,
'dolby': False,
'downloadRequest': False,
'hevc': False,
'kidsSafe': False,
'manufacturer': 'Windows',
'model': 'Windows',
'multiAudioRequired': True,
'osVersion': '10',
'parentalPinValid': True,
'x-apisignatures': self._API_SIGNATURES
})
status_code = traverse_obj(playback, ('code', {int}))
if status_code == 474:
self.raise_geo_restricted(countries=['IN'])
elif status_code == 1008:
error_msg = 'This content is only available for premium users'
if self._ACCESS_TOKEN:
raise ExtractorError(error_msg, expected=True)
self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
elif status_code == 400:
raise ExtractorError('The requested content is not available', expected=True)
elif status_code is not None and status_code != 200:
raise ExtractorError(
f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
metadata = self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
video_id, fatal=False, query={
'ids': f'include:{video_id}',
'responseType': 'common',
'devicePlatformType': 'desktop',
})
return {
'id': video_id,
'http_headers': self._API_HEADERS,
**self._extract_formats_and_subtitles(playback, video_id),
**traverse_obj(playback, ('data', {
# fallback metadata
'title': ('name', {str}),
'description': ('fullSynopsis', {str}),
'series': ('show', 'name', {str}, {lambda x: x or None}),
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
'episode': ('fullTitle', {str}),
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('totalDuration', {float_or_none}),
'thumbnail': ('images', {url_or_none}),
})),
**traverse_obj(metadata, ('result', 0, {
'title': ('fullTitle', {str}),
'description': ('fullSynopsis', {str}),
'series': ('showName', {str}, {lambda x: x or None}),
'season': ('seasonName', {str}, {lambda x: x or None}),
'season_number': ('season', {int_or_none}),
'season_id': ('seasonId', {str}, {lambda x: x or None}),
'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}),
'release_date': ('telecastDate', {str}),
'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('duration', {float_or_none}),
'genres': ('genres', ..., {str}),
'thumbnail': ('seo', 'ogImage', {url_or_none}),
})),
}
class JioCinemaSeriesIE(JioCinemaBaseIE):
IE_NAME = 'jiocinema:series'
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
'info_dict': {
'id': '3499917',
'title': 'naagin',
},
'playlist_mincount': 120,
}]
def _entries(self, series_id):
seasons = self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
'Downloading series metadata JSON', query={
'sort': 'season:asc',
'id': series_id,
'responseType': 'common',
})
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
season_id = season['id']
label = season.get('season') or season_num
for page_num in itertools.count(1):
episodes = traverse_obj(self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
season_id, f'Downloading season {label} page {page_num} JSON', query={
'sort': 'episode:asc',
'id': season_id,
'responseType': 'common',
'page': page_num,
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
if not episodes:
break
for episode in episodes:
yield self.url_result(
episode['slug'], JioCinemaIE, **traverse_obj(episode, {
'video_id': 'id',
'video_title': ('fullTitle', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
}))
def _real_extract(self, url):
slug, series_id = self._match_valid_url(url).group('slug', 'id')
return self.playlist_result(self._entries(series_id), series_id, slug)

View File

@@ -1,8 +1,5 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate
)
from ..utils import ExtractorError, unified_strdate
class JoveIE(InfoExtractor):

View File

@@ -1,6 +1,6 @@
import base64
import re
import json
import re
from .common import InfoExtractor
from ..utils import (

View File

@@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
strip_or_none,
str_or_none,
strip_or_none,
traverse_obj,
unified_timestamp,
)

View File

@@ -4,18 +4,18 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_parse_qs,
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
clean_html,
format_field,
int_or_none,
unsmuggle_url,
remove_start,
smuggle_url,
traverse_obj,
remove_start
unsmuggle_url,
)

View File

@@ -1,7 +1,7 @@
import time
import hashlib
import random
import string
import hashlib
import time
import urllib.parse
from .common import InfoExtractor

View File

@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
get_element_by_id,
clean_html,
ExtractorError,
InAdvancePagedList,
clean_html,
get_element_by_id,
remove_start,
)

View File

@@ -1,9 +1,25 @@
from .common import InfoExtractor
from .wat import WatIE
from ..utils import ExtractorError, int_or_none
from ..utils.traversal import traverse_obj
class LCIIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
_VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html'
_TESTS = [{
'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
'info_dict': {
'id': '14113788',
'ext': 'mp4',
'title': '24H Pujadas du vendredi 24 mai 2024',
'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
'upload_date': '20240524',
'duration': 6158,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
'id': '13875948',
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
next_data = self._search_nextjs_data(webpage, video_id)
wat_id = traverse_obj(next_data, (
'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
if wat_id is None:
raise ExtractorError('Could not find wat_id')
return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from .arkena import ArkenaIE
from .common import InfoExtractor
class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE

View File

@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
determine_protocol,
parse_duration,
int_or_none,
parse_duration,
)

View File

@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
str_or_none,

View File

@@ -11,9 +11,9 @@ from ..compat import (
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
determine_ext,
encode_data_uri,
ExtractorError,
int_or_none,
orderedSet,
parse_iso8601,

View File

@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,

Some files were not shown because too many files have changed in this diff Show More