mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
Merge 3f9542f4e8
into 06c1a8cdff
This commit is contained in:
commit
cb6c7b28c2
@ -1947,6 +1947,34 @@ def test_search_nextjs_data(self):
|
|||||||
with self.assertWarns(DeprecationWarning):
|
with self.assertWarns(DeprecationWarning):
|
||||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||||
|
|
||||||
|
def test_search_nextjs_v13_data(self):
|
||||||
|
HTML = R'''
|
||||||
|
<script>(self.__next_f=self.__next_f||[]).push([0])</script>
|
||||||
|
<script>self.__next_f.push([2,"0:[\"$\",\"$L0\",null,{\"do_not_add_this\":\"fail\"}]\n"])</script>
|
||||||
|
<script>self.__next_f.push([1,"1:I[46975,[],\"HTTPAccessFallbackBoundary\"]\n2:I[32630,[\"8183\",\"static/chunks/8183-768193f6a9e33cdd.js\"]]\n"])</script>
|
||||||
|
<script nonce="abc123">self.__next_f.push([1,"e:[false,[\"$\",\"div\",null,{\"children\":[\"$\",\"$L18\",null,{\"foo\":\"bar\"}]}],false]\n"])</script>
|
||||||
|
<script>self.__next_f.push([1,"2a:[[\"$\",\"div\",null,{\"className\":\"flex flex-col\",\"children\":[]}],[\"$\",\"$L16\",null,{\"meta\":{\"dateCreated\":1730489700,\"uuid\":\"40cac41d-8d29-4ef5-aa11-75047b9f0907\"}}]]\n"])</script>
|
||||||
|
<script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
|
||||||
|
<script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
|
||||||
|
'''
|
||||||
|
EXPECTED = [{
|
||||||
|
'foo': 'bar',
|
||||||
|
}, {
|
||||||
|
'meta': {
|
||||||
|
'dateCreated': 1730489700,
|
||||||
|
'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'duplicated_field_name': {'x': 1},
|
||||||
|
}, {
|
||||||
|
'duplicated_field_name': {'y': 2},
|
||||||
|
}, {
|
||||||
|
'decoded': 'success',
|
||||||
|
}]
|
||||||
|
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
||||||
|
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), [])
|
||||||
|
self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), [])
|
||||||
|
|
||||||
def test_search_nuxt_json(self):
|
def test_search_nuxt_json(self):
|
||||||
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
|
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
|
||||||
VALID_DATA = '''
|
VALID_DATA = '''
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import base64
|
import base64
|
||||||
|
import binascii
|
||||||
import collections
|
import collections
|
||||||
import functools
|
import functools
|
||||||
import getpass
|
import getpass
|
||||||
@ -1782,6 +1783,59 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
|
|||||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||||
|
|
||||||
|
def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
|
||||||
|
"""Parses Next.js app router flight data that was introduced in Next.js v13"""
|
||||||
|
nextjs_data = []
|
||||||
|
if not fatal and not isinstance(webpage, str):
|
||||||
|
return nextjs_data
|
||||||
|
|
||||||
|
def flatten(flight_data):
|
||||||
|
if not isinstance(flight_data, list):
|
||||||
|
return
|
||||||
|
if len(flight_data) == 4 and flight_data[0] == '$':
|
||||||
|
_, name, _, data = flight_data
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return
|
||||||
|
children = data.pop('children', None)
|
||||||
|
if data and isinstance(name, str) and name[:1] == '$':
|
||||||
|
# It is useful hydration JSON data
|
||||||
|
nextjs_data.append(data)
|
||||||
|
flatten(children)
|
||||||
|
return
|
||||||
|
for f in flight_data:
|
||||||
|
flatten(f)
|
||||||
|
|
||||||
|
# The flight segments regex pattern can afford to be (and should be) strict
|
||||||
|
# Ref: https://github.com/vercel/next.js/commit/5a4a08fdce91a038f2ed3a70568d3ed040403150
|
||||||
|
# /packages/next/src/server/app-render/use-flight-response.tsx
|
||||||
|
for flight_segment in re.findall(r'<script[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
|
||||||
|
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
|
||||||
|
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
|
||||||
|
# Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
|
||||||
|
if not isinstance(segment, list) or len(segment) != 2:
|
||||||
|
self.write_debug(
|
||||||
|
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
|
||||||
|
continue
|
||||||
|
payload_type, chunk = segment
|
||||||
|
if payload_type == 3:
|
||||||
|
try:
|
||||||
|
chunk = base64.b64decode(chunk).decode()
|
||||||
|
except (ValueError, binascii.Error):
|
||||||
|
msg = 'Unable to parse next.js data: unable to decode flight data'
|
||||||
|
if not fatal:
|
||||||
|
self.report_warning(msg, video_id=video_id, only_once=True)
|
||||||
|
continue
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
elif payload_type != 1:
|
||||||
|
# Ignore useless payload types (0: bootstrap, 2: form state)
|
||||||
|
continue
|
||||||
|
# Not all chunks are complete JSON data; this should always be non-fatal
|
||||||
|
flatten(self._search_json(
|
||||||
|
r'^[\da-f]+:', chunk, 'flight data', video_id,
|
||||||
|
default=None, contains_pattern=r'\[.+\]'))
|
||||||
|
|
||||||
|
return nextjs_data
|
||||||
|
|
||||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||||
rectx = re.escape(context_name)
|
rectx = re.escape(context_name)
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
@ -19,7 +18,11 @@
|
|||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import find_element, traverse_obj
|
from ..utils.traversal import (
|
||||||
|
find_element,
|
||||||
|
get_first,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
@ -258,7 +261,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
|
'id': 'b2cf9fd8-e971-4757-8651-848f2772df61', # old: ec217ecc-0733-48cf-ac06-af1347b849d1
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||||
'timestamp': 1502623500,
|
'timestamp': 1502623500,
|
||||||
@ -269,7 +272,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [FranceTVIE.ie_key()],
|
'skip': 'Unfortunately, this video is no longer available',
|
||||||
}, {
|
}, {
|
||||||
# geo-restricted
|
# geo-restricted
|
||||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||||
@ -287,7 +290,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1441,
|
'duration': 1441,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
'skip': 'Unfortunately, this video is no longer available',
|
||||||
}, {
|
}, {
|
||||||
# geo-restricted livestream (workflow == 'token-akamai')
|
# geo-restricted livestream (workflow == 'token-akamai')
|
||||||
'url': 'https://www.france.tv/france-4/direct.html',
|
'url': 'https://www.france.tv/france-4/direct.html',
|
||||||
@ -308,6 +311,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'livestream'},
|
'params': {'skip_download': 'livestream'},
|
||||||
|
}, {
|
||||||
|
# Not geo-restricted
|
||||||
|
'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023',
|
||||||
|
'duration': 1065,
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1703147921,
|
||||||
|
'upload_date': '20231221',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# france3
|
# france3
|
||||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||||
@ -342,30 +358,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
|
|
||||||
def _find_json(self, s):
|
|
||||||
return self._search_json(
|
|
||||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
|
||||||
|
|
||||||
nextjs_data = traverse_obj(
|
if get_first(nextjs_data, ('isLive', {bool})):
|
||||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
|
||||||
(..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
|
|
||||||
|
|
||||||
if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
|
|
||||||
# For livestreams we need the id of the stream instead of the currently airing episode id
|
# For livestreams we need the id of the stream instead of the currently airing episode id
|
||||||
video_id = traverse_obj(nextjs_data, (
|
video_id = get_first(nextjs_data, ('options', 'id', {str}))
|
||||||
..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
|
|
||||||
'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
|
|
||||||
'options', 'id', {str}, any))
|
|
||||||
else:
|
else:
|
||||||
video_id = traverse_obj(nextjs_data, (
|
video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str}))
|
||||||
..., ..., ..., 'children',
|
|
||||||
lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
|
|
||||||
'video', ('playerReplayId', 'siId'), {str}, any))
|
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
raise ExtractorError('Unable to extract video ID')
|
raise ExtractorError('Unable to extract video ID')
|
||||||
|
@ -5,16 +5,11 @@
|
|||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import ExtractorError, int_or_none
|
||||||
ExtractorError,
|
from ..utils.traversal import get_first, traverse_obj
|
||||||
int_or_none,
|
|
||||||
remove_end,
|
|
||||||
traverse_obj,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GoPlayIE(InfoExtractor):
|
class GoPlayIE(InfoExtractor):
|
||||||
@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'S22 - Aflevering 1',
|
'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1',
|
||||||
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
||||||
'series': 'De Slimste Mens ter Wereld',
|
'series': 'De Slimste Mens ter Wereld',
|
||||||
'episode': 'Episode 1',
|
'episode': 'Wordt aangekondigd',
|
||||||
'season_number': 22,
|
'season_number': 22,
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'season': 'Season 22',
|
'season': 'Season 22',
|
||||||
@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'S11 - Aflevering 1',
|
'title': 'De Mol - S11 - Aflevering 1',
|
||||||
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
||||||
'episode': 'Episode 1',
|
'episode': 'Episode 1',
|
||||||
'series': 'De Mol',
|
'series': 'De Mol',
|
||||||
@ -75,21 +70,13 @@ def _real_initialize(self):
|
|||||||
if not self._id_token:
|
if not self._id_token:
|
||||||
raise self.raise_login_required(method='password')
|
raise self.raise_login_required(method='password')
|
||||||
|
|
||||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
|
|
||||||
def _find_json(self, s):
|
|
||||||
return self._search_json(
|
|
||||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
nextjs_data = traverse_obj(
|
nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
|
||||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
meta = get_first(nextjs_data, (
|
||||||
(..., {json.loads}, ..., {self._find_json}, ...))
|
lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path))
|
||||||
meta = traverse_obj(nextjs_data, (
|
|
||||||
..., ..., 'children', ..., ..., 'children',
|
|
||||||
lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
|
|
||||||
|
|
||||||
video_id = meta['uuid']
|
video_id = meta['uuid']
|
||||||
info_dict = traverse_obj(meta, {
|
info_dict = traverse_obj(meta, {
|
||||||
@ -98,19 +85,18 @@ def _real_extract(self, url):
|
|||||||
})
|
})
|
||||||
|
|
||||||
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
||||||
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
|
for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})):
|
||||||
episode_data = traverse_obj(
|
episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||||
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
|
||||||
if not episode_data:
|
if not episode_data:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
episode_title = traverse_obj(
|
season_number = traverse_obj(season_data, ('season', {int_or_none}))
|
||||||
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'title': episode_title or info_dict.get('title'),
|
'episode': traverse_obj(episode_data, ('episodeTitle', {str})),
|
||||||
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
|
|
||||||
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
|
|
||||||
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
||||||
|
'season_number': season_number,
|
||||||
|
'series': self._search_regex(
|
||||||
|
fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None),
|
||||||
})
|
})
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .brightcove import BrightcoveNewIE
|
from .brightcove import BrightcoveNewIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -11,7 +8,12 @@
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import require, traverse_obj, value
|
from ..utils.traversal import (
|
||||||
|
get_first,
|
||||||
|
require,
|
||||||
|
traverse_obj,
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NineNowIE(InfoExtractor):
|
class NineNowIE(InfoExtractor):
|
||||||
@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
|
||||||
|
|
||||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
|
|
||||||
def _find_json(self, s):
|
|
||||||
return self._search_json(
|
|
||||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id, video_type = self._match_valid_url(url).group('id', 'type')
|
display_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
common_data = traverse_obj(
|
common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict}))
|
||||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
|
||||||
(..., {json.loads}, ..., {self._find_json},
|
|
||||||
lambda _, v: v['payload'][video_type]['slug'] == display_id,
|
|
||||||
'payload', any, {require('video data')}))
|
|
||||||
|
|
||||||
if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
|
if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
|
||||||
self.report_drm(display_id)
|
self.report_drm(display_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user