mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-21 23:48:57 +00:00
Merge branch 'yt-dlp:master' into threads
This commit is contained in:
@@ -76,6 +76,7 @@ from .aenetworks import (
|
||||
)
|
||||
from .aeonco import AeonCoIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVCatchStoryIE,
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
@@ -503,7 +504,6 @@ from .dhm import DHMIE
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .discogs import DiscogsReleasePlaylistIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dlf import (
|
||||
@@ -531,16 +531,12 @@ from .dplay import (
|
||||
DiscoveryPlusIndiaShowIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
DiscoveryPlusItalyShowIE,
|
||||
DIYNetworkIE,
|
||||
DPlayIE,
|
||||
FoodNetworkIE,
|
||||
GlobalCyclingNetworkPlusIE,
|
||||
GoDiscoveryIE,
|
||||
HGTVDeIE,
|
||||
HGTVUsaIE,
|
||||
InvestigationDiscoveryIE,
|
||||
MotorTrendIE,
|
||||
MotorTrendOnDemandIE,
|
||||
ScienceChannelIE,
|
||||
TravelChannelIE,
|
||||
)
|
||||
@@ -779,6 +775,7 @@ from .gopro import GoProIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gotostage import GoToStageIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .graspop import GraspopIE
|
||||
from .gronkh import (
|
||||
GronkhFeedIE,
|
||||
GronkhIE,
|
||||
@@ -969,6 +966,10 @@ from .la7 import (
|
||||
LA7PodcastEpisodeIE,
|
||||
LA7PodcastIE,
|
||||
)
|
||||
from .laracasts import (
|
||||
LaracastsIE,
|
||||
LaracastsPlaylistIE,
|
||||
)
|
||||
from .lastfm import (
|
||||
LastFMIE,
|
||||
LastFMPlaylistIE,
|
||||
@@ -1113,12 +1114,15 @@ from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgtv import MGTVIE
|
||||
from .microsoftembed import MicrosoftEmbedIE
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
MicrosoftVirtualAcademyIE,
|
||||
from .microsoftembed import (
|
||||
MicrosoftBuildIE,
|
||||
MicrosoftEmbedIE,
|
||||
MicrosoftLearnEpisodeIE,
|
||||
MicrosoftLearnPlaylistIE,
|
||||
MicrosoftLearnSessionIE,
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
@@ -1603,6 +1607,7 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicVideoIE,
|
||||
)
|
||||
from .r7 import (
|
||||
R7IE,
|
||||
@@ -1755,7 +1760,10 @@ from .rtve import (
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
from .rtvs import RTVSIE
|
||||
from .rtvslo import RTVSLOIE
|
||||
from .rtvslo import (
|
||||
RTVSLOIE,
|
||||
RTVSLOShowIE,
|
||||
)
|
||||
from .rudovideo import RudoVideoIE
|
||||
from .rule34video import Rule34VideoIE
|
||||
from .rumble import (
|
||||
@@ -1925,6 +1933,10 @@ from .spreaker import (
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
)
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@@ -2311,6 +2323,7 @@ from .vidio import (
|
||||
)
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidly import VidlyIE
|
||||
from .vidyard import VidyardIE
|
||||
from .viewlift import (
|
||||
ViewLiftEmbedIE,
|
||||
ViewLiftIE,
|
||||
@@ -2376,6 +2389,10 @@ from .vrt import (
|
||||
VrtNUIE,
|
||||
)
|
||||
from .vtm import VTMIE
|
||||
from .vtv import (
|
||||
VTVIE,
|
||||
VTVGoIE,
|
||||
)
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
@@ -67,7 +66,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'WWI Centenary',
|
||||
'description': 'md5:c2379ec0ca84072e86b446e536954546',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
|
||||
'info_dict': {
|
||||
@@ -75,7 +74,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
|
||||
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
|
||||
'info_dict': {
|
||||
@@ -86,7 +85,7 @@ class ABCIE(InfoExtractor):
|
||||
'upload_date': '20200813',
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
@@ -95,7 +94,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -126,7 +125,7 @@ class ABCIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
|
||||
if expired:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
@@ -164,7 +163,7 @@ class ABCIE(InfoExtractor):
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
'format_id': format_id
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
|
||||
sig = hmac.new(
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
path.encode(), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
|
||||
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
@@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
stream, lambda x: x['streams']['hls'][sd], str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'md5:93119346c24a7c322d446d8eece430ff',
|
||||
'series': 'Upper Middle Bogan',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
@@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
|
||||
'series': '7.30 Mark Humphries Satire',
|
||||
'season': 'Episodes',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}]
|
||||
@@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
|
||||
@@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
@@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||
'id': video_id,
|
||||
'key': 'otv.web.%s.story' % station,
|
||||
'key': f'otv.web.{station}.story',
|
||||
'station': station,
|
||||
})['data']
|
||||
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
title = video.get('title') or video['linkText']
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -9,12 +9,12 @@ import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..networking import RequestHandler, Response
|
||||
from ..networking.exceptions import TransportError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -26,37 +26,36 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
class AbemaLicenseRH(RequestHandler):
|
||||
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||
_SUPPORTED_PROXY_SCHEMES = None
|
||||
_SUPPORTED_FEATURES = None
|
||||
RH_NAME = 'abematv_license'
|
||||
|
||||
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
handler_order = 499
|
||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||
|
||||
def __init__(self, ie: 'AbemaTVIE'):
|
||||
# the protocol that this should really handle is 'abematv-license://'
|
||||
# abematv_license_open is just a placeholder for development purposes
|
||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
|
||||
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.ie = ie
|
||||
|
||||
def _send(self, request):
|
||||
url = request.url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
|
||||
try:
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
except ExtractorError as e:
|
||||
raise TransportError(cause=e.cause) from e
|
||||
except (IndexError, KeyError, TypeError) as e:
|
||||
raise TransportError(cause=repr(e)) from e
|
||||
|
||||
return Response(
|
||||
io.BytesIO(response_data), url,
|
||||
headers={'Content-Length': str(len(response_data))})
|
||||
|
||||
def _get_videokey_from_ticket(self, ticket):
|
||||
to_show = self.ie.get_param('verbose', False)
|
||||
media_token = self.ie._get_media_token(to_show=to_show)
|
||||
@@ -66,31 +65,23 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
'lt': ticket,
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
@@ -103,11 +94,11 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _generate_aks(cls, deviceid):
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
deviceid = deviceid.encode()
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
ts_1hour_str = str(ts_1hour).encode()
|
||||
|
||||
tmp = None
|
||||
|
||||
@@ -119,7 +110,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for i in range(count):
|
||||
for _ in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
@@ -160,7 +151,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode('utf-8'),
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
@@ -180,7 +171,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1'
|
||||
'appVersion': '3.27.1',
|
||||
}, headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})['token']
|
||||
@@ -202,8 +193,8 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
@@ -344,7 +335,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
@@ -368,6 +359,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
info['episode_number'] = epis if epis < 2000 else None
|
||||
|
||||
is_live, m3u8_url = False, None
|
||||
availability = 'public'
|
||||
if video_type == 'now-on-air':
|
||||
is_live = True
|
||||
channel_url = 'https://api.abema.io/v1/channels'
|
||||
@@ -389,6 +381,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'name'),
|
||||
@@ -408,6 +401,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
headers=headers)
|
||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||
self.report_warning('This is a premium-only stream')
|
||||
availability = 'premium_only'
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||
else:
|
||||
@@ -425,6 +419,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ class ACastIE(ACastBaseIE):
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
@@ -93,13 +93,13 @@ class ACastIE(ACastBaseIE):
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
f'{channel}/episodes/{display_id}',
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
@@ -130,7 +130,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
return False if ACastIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
|
||||
@@ -25,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': float_or_none(video.get('avgBitrate')),
|
||||
**parse_codecs(video.get('codecs', ''))
|
||||
**parse_codecs(video.get('codecs', '')),
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -77,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -7,7 +7,6 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '9841',
|
||||
@@ -71,10 +71,10 @@ class ADNIE(ADNBaseIE):
|
||||
},
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
@@ -111,9 +111,9 @@ class ADNIE(ADNBaseIE):
|
||||
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
base64.b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
base64.b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
@@ -136,7 +136,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
|
||||
ass_subtitles_timecode(start),
|
||||
ass_subtitles_timecode(end),
|
||||
'{\\a%d}' % alignment if alignment != 2 else '',
|
||||
@@ -178,7 +178,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata',
|
||||
@@ -218,13 +218,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
'source': 'Web',
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
@@ -256,7 +256,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
'Downloading %s %s JSON metadata' % (format_id, quality),
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -276,7 +276,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
@@ -299,9 +299,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
@@ -319,8 +319,8 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
'X-Target-Distribution': lang or 'fr',
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
@@ -328,8 +328,8 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
yield self.url_result(join_nonempty(
|
||||
'https://animationdigitalnetwork.com', lang, 'video',
|
||||
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AdobeConnectIE(InfoExtractor):
|
||||
@@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
for con_string in qs['conStrings'][0].split(','):
|
||||
formats.append({
|
||||
'format_id': con_string.split('://')[0],
|
||||
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + qs['streamName'][0],
|
||||
'rtmp_conn': 'S:' + qs['ticket'][0],
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,6 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
@@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
|
||||
return subtitles
|
||||
|
||||
def _parse_video_data(self, video_data):
|
||||
video_id = compat_str(video_data['id'])
|
||||
video_id = str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
s3_extracted = False
|
||||
@@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||
page += 1
|
||||
query['page'] = page
|
||||
for element_data in self._call_api(
|
||||
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||
self._RESOURCE, display_id, query, f'Download Page {page}'):
|
||||
yield self._process_data(element_data)
|
||||
|
||||
def _extract_playlist_entries(self, display_id, query):
|
||||
|
||||
@@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
getShowBySlug(slug:"%s") {
|
||||
%%s
|
||||
}
|
||||
}''' % show_path
|
||||
}''' % show_path # noqa: UP031
|
||||
if episode_path:
|
||||
query = query % '''title
|
||||
getVideoBySlug(slug:"%s") {
|
||||
@@ -128,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
episode_title = title = video_data['title']
|
||||
series = show_data.get('title')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
title = f'{series} - {title}'
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -191,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
f'http://adultswim.com/videos/{show_path}/{slug}',
|
||||
'AdultSwim', video.get('_id')))
|
||||
return self.playlist_result(
|
||||
entries, show_path, show_data.get('title'),
|
||||
|
||||
@@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
@@ -142,7 +142,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@@ -171,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -209,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields),
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = 'http://watch.%s' % domain
|
||||
base_url = f'http://watch.{domain}'
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
@@ -248,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
@@ -309,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "History of Valentine’s Day",
|
||||
'title': 'History of Valentine’s Day',
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
@@ -364,6 +364,6 @@ class BiographyIE(AENetworksBaseIE):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
||||
@@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348
|
||||
}
|
||||
'duration': 348,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
@@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
'duration': 1344,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -55,9 +56,16 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
return self._download_json(Request(
|
||||
f'https://api.m.afreecatv.com/{endpoint}',
|
||||
data=data, headers=headers, query=query,
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv'
|
||||
@@ -72,7 +80,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?P<id>\d+)/?(?:$|[?#&])
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
@@ -184,9 +192,9 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.m.afreecatv.com/station/video/a/view', video_id,
|
||||
headers={'Referer': url}, data=urlencode_postdata({
|
||||
data = self._call_api(
|
||||
'station/video/a/view', video_id, headers={'Referer': url},
|
||||
data=urlencode_postdata({
|
||||
'nTitleNo': video_id,
|
||||
'nApiLevel': 10,
|
||||
}))['data']
|
||||
@@ -227,7 +235,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
})
|
||||
}),
|
||||
})
|
||||
|
||||
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
|
||||
@@ -253,6 +261,43 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||
|
||||
|
||||
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:catchstory'
|
||||
IE_DESC = 'afreecatv.com catch story'
|
||||
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||
'info_dict': {
|
||||
'id': '103247',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'afreecatv:live'
|
||||
IE_DESC = 'afreecatv.com livestreams'
|
||||
|
||||
@@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
media_id, f'Downloading podcast {ext} URL')
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
@@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
def _create_url(video_id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{video_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
||||
@@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
@@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
|
||||
@@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
|
||||
'timestamp': 1667370519,
|
||||
'title': 'Ангел хранитель 1 серия',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
@@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
data, lambda x: x['followBar']['name'], str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'timestamp': 1636219149,
|
||||
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
|
||||
'upload_date': '20211106',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
|
||||
'info_dict': {
|
||||
@@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
base, post_type, id = self._match_valid_url(url).groups()
|
||||
base, post_type, display_id = self._match_valid_url(url).groups()
|
||||
wp = {
|
||||
'balkans.aljazeera.net': 'ajb',
|
||||
'chinese.aljazeera.net': 'chinese',
|
||||
@@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'news': 'news',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
f'https://{base}/graphql', id, query={
|
||||
f'https://{base}/graphql', display_id, query={
|
||||
'wp-site': wp,
|
||||
'operationName': 'ArchipelagoSingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': id,
|
||||
'name': display_id,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
@@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
embed = 'default'
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
|
||||
group=(1, 2, 3, 4), default=(None, None, None, None))
|
||||
@@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'ie_key': 'Generic'
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
'ie_key': 'BrightcoveNew'
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
@@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
video, lambda x: x['added_at']['date'], str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
|
||||
@@ -33,27 +33,27 @@ _QUERIES = {
|
||||
video: getClip(clipIdentifier: $id) {
|
||||
%s %s
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'montage': '''query ($id: String!) {
|
||||
video: getMontage(clipIdentifier: $id) {
|
||||
%s
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'Montages': '''query ($page: Int!, $user: String!) {
|
||||
videos: montages(search: createdDate, page: $page, user: $user) {
|
||||
data { %s }
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Mobile Clips': '''query ($page: Int!, $user: String!) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
}
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230425',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
|
||||
'info_dict': {
|
||||
@@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230702',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
|
||||
'info_dict': {
|
||||
@@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230418',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
|
||||
'info_dict': {
|
||||
@@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230703',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE):
|
||||
'id': '62b8bdfc9021052f7905882d-clips',
|
||||
'title': 'cherokee - Clips',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips-730',
|
||||
'title': 'cherokee - Clips - 730',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-montages',
|
||||
'title': 'cherokee - Montages',
|
||||
},
|
||||
'playlist_mincount': 4
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-mobile',
|
||||
'title': 'cherokee - Mobile Clips',
|
||||
},
|
||||
'playlist_mincount': 1
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
@@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
||||
@@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
@@ -21,7 +21,7 @@ class AluraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '60095',
|
||||
'ext': 'mp4',
|
||||
'title': 'Referências, ref-set e alter'
|
||||
'title': 'Referências, ref-set e alter',
|
||||
},
|
||||
'skip': 'Requires alura account credentials'},
|
||||
{
|
||||
@@ -30,7 +30,7 @@ class AluraIE(InfoExtractor):
|
||||
'only_matching': True},
|
||||
{
|
||||
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
|
||||
'only_matching': True}
|
||||
'only_matching': True},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -62,7 +62,7 @@ class AluraIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
"formats": formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -91,7 +91,7 @@ class AluraIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
@@ -103,7 +103,7 @@ class AluraIE(InfoExtractor):
|
||||
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
|
||||
return False if AluraIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
'url': video_url,
|
||||
'id_key': self.ie_key(),
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
entries.append(entry)
|
||||
return self.playlist_result(entries, course_path, course_title)
|
||||
|
||||
@@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor):
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
@@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
'uploader_id': 'sammorrill',
|
||||
},
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
@@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
f'https://amara.org/api/videos/{video_id}/',
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
@@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
@@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
|
||||
|
||||
|
||||
class AmazonReviewsIE(InfoExtractor):
|
||||
|
||||
@@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
'currentplatform': 'dWeb',
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
|
||||
@@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||
% (requestor_id.lower(), display_id), display_id)['data']
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
@@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + releasePid
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
@@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
|
||||
@@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
resource_type = 'episodes'
|
||||
|
||||
resource = self._download_json(
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
@@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
playlist_id = f'season_{season_number}'
|
||||
playlist_title = f'Season {season_number}'
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
@@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = 'media-%s' % name
|
||||
media_name = f'media-{name}'
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
@@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
|
||||
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 1359.0
|
||||
}
|
||||
'duration': 1359.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
|
||||
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
|
||||
@@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
|
||||
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 3276.0
|
||||
}
|
||||
'duration': 3276.0,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# Angel uses cloudinary in the background and supports image transformations.
|
||||
|
||||
@@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
raise ExtractorError(f'no videos found for {video_id}', expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
@@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
@@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor):
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
@@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode())
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
@@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor):
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
||||
@@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
|
||||
@@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
|
||||
'only_matching': True,
|
||||
@@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
|
||||
@@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
f'{base_url}/player/{video_id}', video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
|
||||
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
|
||||
@@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
]
|
||||
],
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
@@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
@@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'format_id': f'{version}-{size}',
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
@@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
@@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '''))
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
return f'<html>{s}</html>'
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
@@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
for fmt in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
'format': fmt['type'],
|
||||
'width': int_or_none(fmt['width']),
|
||||
'height': int_or_none(fmt['height']),
|
||||
})
|
||||
|
||||
playlist.append({
|
||||
@@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
@@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
|
||||
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Bells Of Rostov',
|
||||
'ext': 'mp3',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
|
||||
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
|
||||
@@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:012b2d668ae753be36896f343d12a236',
|
||||
'upload_date': '20190928',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
# Original formats are private
|
||||
'url': 'https://archive.org/details/irelandthemakingofarepublic',
|
||||
@@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
|
||||
identifier, _, entry_id = video_id.partition('/')
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
@@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/'),
|
||||
}
|
||||
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
@@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'height': int_or_none(f.get('width')),
|
||||
'filesize': int_or_none(f.get('size'))})
|
||||
|
||||
extension = (f['name'].rsplit('.', 1) + [None])[1]
|
||||
_, has_ext, extension = f['name'].rpartition('.')
|
||||
if not has_ext:
|
||||
extension = None
|
||||
|
||||
# We don't want to skip private formats if the user has access to them,
|
||||
# however without access to an account with such privileges we can't implement/test this.
|
||||
@@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https',
|
||||
'source_preference': 0 if f.get('source') == 'original' else -1,
|
||||
'format_note': f.get('source')
|
||||
'format_note': f.get('source'),
|
||||
})
|
||||
|
||||
for entry in entries.values():
|
||||
@@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Internal link
|
||||
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
|
||||
@@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
|
||||
@@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
'uploader': 'machinima',
|
||||
},
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
|
||||
@@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
'info_dict': {
|
||||
@@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
|
||||
@@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
|
||||
@@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
]
|
||||
r'unable to download capture webpage \(it may not be archived\)',
|
||||
],
|
||||
}, { # Very old YouTube page, has - YouTube in title.
|
||||
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
|
||||
'info_dict': {
|
||||
'id': '-06-KB9XTzg',
|
||||
'ext': 'flv',
|
||||
'title': 'New Coin Hack!! 100% Safe!!'
|
||||
}
|
||||
'title': 'New Coin Hack!! 100% Safe!!',
|
||||
},
|
||||
}, {
|
||||
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
|
||||
'info_dict': {
|
||||
@@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
|
||||
@@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
@@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
@@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
@@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
@@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
@@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
@@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
@@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
@@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Video not archived, only capture is unavailable video page
|
||||
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, { # Encoded url
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
@@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
|
||||
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
|
||||
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'limit': 500,
|
||||
'filter': ['statuscode:200'] + (filters or []),
|
||||
'collapse': collapse or [],
|
||||
**(query or {})
|
||||
**(query or {}),
|
||||
}
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
return [dict(zip(res[0], v)) for v in res[1:]]
|
||||
elif not isinstance(res, list) or len(res) != 0:
|
||||
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
|
||||
|
||||
@@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
{
|
||||
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
|
||||
'filesize': int_or_none(thumbnail_dict.get('length')),
|
||||
'preference': int_or_none(thumbnail_dict.get('length'))
|
||||
'preference': int_or_none(thumbnail_dict.get('length')),
|
||||
} for thumbnail_dict in response)
|
||||
if not try_all:
|
||||
break
|
||||
@@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
@@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
url = urllib.parse.unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
||||
if itag and itag in YoutubeIE._formats:
|
||||
format.update(YoutubeIE._formats[itag])
|
||||
format.update({'format_id': itag})
|
||||
fmt.update(YoutubeIE._formats[itag])
|
||||
fmt.update({'format_id': itag})
|
||||
else:
|
||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
||||
ext = (mimetype2ext(mime)
|
||||
or urlhandle_detect_ext(urlh)
|
||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
||||
format.update({'ext': ext})
|
||||
info['formats'] = [format]
|
||||
fmt.update({'ext': ext})
|
||||
info['formats'] = [fmt]
|
||||
if not info.get('duration'):
|
||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -11,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
@@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor):
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
entries.append(f'arcpublishing:{org}:{uuid}')
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
if all(f.get('acodec') == 'none' for f in m3u8_formats):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
height = f.get('height')
|
||||
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'format_id': join_nonempty(stream_type, vbr),
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
||||
@@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
'plugin': 'aasp-3.1.1.69.124',
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
'format_id': f'a{num}-rtmp-{quality}',
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
'format_id': f'a{num}-{ext}-{quality}',
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
|
||||
@@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
@@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
@@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
'start_time': int_or_none(urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ class Art19IE(InfoExtractor):
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
|
||||
@@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = rf'''(?x)
|
||||
(?:https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
|
||||
)
|
||||
|arte://program)
|
||||
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
/(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'only_matching': True,
|
||||
@@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
language_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
'x-validated-age': '18',
|
||||
})
|
||||
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
@@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
'skip': 'No video available'
|
||||
'skip': 'No video available',
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
@@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'only_matching': True,
|
||||
@@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
@@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
|
||||
and super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
|
||||
items.append(video)
|
||||
|
||||
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
|
||||
|
||||
@@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
'skip': 'This video is only available for registered users',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
@@ -49,13 +41,15 @@ class AtresPlayerIE(InfoExtractor):
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'data-scale-spring-2022',
|
||||
'title': 'Data @Scale Spring 2022',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://atscaleconference.com/events/video-scale-2021/',
|
||||
@@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'video-scale-2021',
|
||||
'title': 'Video @Scale 2021',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
|
||||
ie='Generic', playlist_id=id,
|
||||
ie='Generic', playlist_id=playlist_id,
|
||||
title=self._og_search_title(webpage), description=self._og_search_description(webpage))
|
||||
|
||||
@@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
|
||||
'id': 'v-ce9cgn1e70n5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
|
||||
'only_matching': True,
|
||||
@@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor):
|
||||
video_id=video_id)
|
||||
|
||||
video_title = json_data['views']['default']['page']['title']
|
||||
contentResource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = contentResource[0]['id']
|
||||
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id, content in enumerate(contentResource)]
|
||||
content_resource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = content_resource[0]['id']
|
||||
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id_, content in enumerate(content_resource)]
|
||||
|
||||
time_of_request = dt.datetime.now()
|
||||
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||
@@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
|
||||
videos = self._download_json(
|
||||
'https://vas-v4.p7s1video.net/4.0/getsources',
|
||||
content_id, 'Downloading videos JSON', query={
|
||||
'token': jwt_token.decode('utf-8')
|
||||
'token': jwt_token.decode('utf-8'),
|
||||
})
|
||||
|
||||
video_id, videos_data = list(videos['data'].items())[0]
|
||||
video_id, videos_data = next(iter(videos['data'].items()))
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, contentResource[video['id']], video)
|
||||
self._extract_video_info(url, content_resource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
||||
return {
|
||||
|
||||
@@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
'timestamp': 1448354940,
|
||||
'duration': 74022,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||
'only_matching': True,
|
||||
@@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
|
||||
if bitrate:
|
||||
f.update({
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
'format_id': f'http-{bitrate}',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
},
|
||||
}, { # Direct mp3-file link
|
||||
'url': 'https://audioboom.com/posts/8128496.mp3',
|
||||
'md5': 'e329edf304d450def95c7f86a9165ee1',
|
||||
@@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 1689.7,
|
||||
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
|
||||
headers={
|
||||
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=f'id={player_entry_id}'.encode('utf-8'))
|
||||
}, data=f'id={player_entry_id}'.encode())
|
||||
|
||||
return {
|
||||
'id': str(data_json['entry_id']),
|
||||
@@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_entry_id = self._search_regex(
|
||||
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
|
||||
return self._audiodraft_extract_from_id(player_entry_id)
|
||||
|
||||
|
||||
@@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{id}')
|
||||
video_id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{video_id}')
|
||||
|
||||
@@ -3,7 +3,6 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
@@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
'title': 'Extraordinary',
|
||||
},
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
@@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
|
||||
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s' % url)
|
||||
raise ExtractorError(f'Invalid url {url}')
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
@@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
'id': str(api_response.get('id', album_url_tag)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
@@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)',
|
||||
},
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
@@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'id': '837576',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||
'id': '837580',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
note=f'Querying song information ({track_no + 1})')
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
@@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = compat_str(api_response[apikey])
|
||||
result[resultkey] = str(api_response[apikey])
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'id': str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import random
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError, str_or_none, try_get
|
||||
|
||||
|
||||
@@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if response_data is not None:
|
||||
return response_data
|
||||
if len(response) == 1 and 'message' in response:
|
||||
raise ExtractorError('API error: %s' % response['message'],
|
||||
raise ExtractorError('API error: {}'.format(response['message']),
|
||||
expected=True)
|
||||
raise ExtractorError('Unexpected API response')
|
||||
|
||||
def _select_api_base(self):
|
||||
"""Selecting one of the currently available API hosts"""
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
response = super()._download_json(
|
||||
'https://api.audius.co/', None,
|
||||
note='Requesting available API hosts',
|
||||
errnote='Unable to request available API hosts')
|
||||
@@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
|
||||
anything from this link, since the Audius API won't be able to resolve
|
||||
this url
|
||||
"""
|
||||
url = compat_urllib_parse_unquote(url)
|
||||
title = compat_urllib_parse_unquote(title)
|
||||
url = urllib.parse.unquote(url)
|
||||
title = urllib.parse.unquote(title)
|
||||
if '/' in title or '%2F' in title:
|
||||
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
|
||||
return url.replace(title, fixed_title)
|
||||
@@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if self._API_BASE is None:
|
||||
self._select_api_base()
|
||||
try:
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
|
||||
response = super()._download_json(
|
||||
f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
|
||||
errnote=errnote, expected_status=expected_status)
|
||||
except ExtractorError as exc:
|
||||
# some of Audius API hosts may not work as expected and return HTML
|
||||
if 'Failed to parse JSON' in compat_str(exc):
|
||||
if 'Failed to parse JSON' in str(exc):
|
||||
raise ExtractorError('An error occurred while receiving data. Try again',
|
||||
expected=True)
|
||||
raise exc
|
||||
return self._get_response_data(response)
|
||||
|
||||
def _resolve_url(self, url, item_id):
|
||||
return self._api_request('/resolve?url=%s' % url, item_id,
|
||||
return self._api_request(f'/resolve?url={url}', item_id,
|
||||
expected_status=404)
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
# Regular track
|
||||
@@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_ARTWORK_MAP = {
|
||||
"150x150": 150,
|
||||
"480x480": 480,
|
||||
"1000x1000": 1000
|
||||
'150x150': 150,
|
||||
'480x480': 480,
|
||||
'1000x1000': 1000,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
else: # API link
|
||||
title = None
|
||||
# uploader = None
|
||||
track_data = self._api_request('/tracks/%s' % track_id, track_id)
|
||||
track_data = self._api_request(f'/tracks/{track_id}', track_id)
|
||||
|
||||
if not isinstance(track_data, dict):
|
||||
raise ExtractorError('Unexpected API response')
|
||||
@@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
if isinstance(artworks_data, dict):
|
||||
for quality_key, thumbnail_url in artworks_data.items():
|
||||
thumbnail = {
|
||||
"url": thumbnail_url
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
quality_code = self._ARTWORK_MAP.get(quality_key)
|
||||
if quality_code is not None:
|
||||
@@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': track_data.get('title', title),
|
||||
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
|
||||
'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
|
||||
'ext': 'mp3',
|
||||
'description': track_data.get('description'),
|
||||
'duration': track_data.get('duration'),
|
||||
'track': track_data.get('title'),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], str),
|
||||
'genre': track_data.get('genre'),
|
||||
'thumbnails': thumbnails,
|
||||
'view_count': track_data.get('play_count'),
|
||||
@@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'audius:9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
if not track_id:
|
||||
raise ExtractorError('Unable to get track ID from playlist')
|
||||
entries.append(self.url_result(
|
||||
'audius:%s' % track_id,
|
||||
f'audius:{track_id}',
|
||||
ie=AudiusTrackIE.ie_key(), video_id=track_id))
|
||||
return entries
|
||||
|
||||
@@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
raise ExtractorError('Unable to get playlist ID')
|
||||
|
||||
playlist_tracks = self._api_request(
|
||||
'/playlists/%s/tracks' % playlist_id,
|
||||
f'/playlists/{playlist_id}/tracks',
|
||||
title, note='Downloading playlist tracks metadata',
|
||||
errnote='Unable to download playlist tracks metadata')
|
||||
if not isinstance(playlist_tracks, list):
|
||||
@@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
|
||||
profile_audius_id = _profile_data[0]['id']
|
||||
profile_bio = _profile_data[0].get('bio')
|
||||
|
||||
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
|
||||
api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
|
||||
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
@@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
|
||||
show_id, video_id, season_id = self._match_valid_url(url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
f'http://awaan.ae/program/season/{season_id}',
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
|
||||
|
||||
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
@@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
@@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
@@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
@@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
@@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
video_id = str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
@@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
'X-Api-Key': self._AWS_API_KEY,
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
return hashlib.sha256(s.encode()).hexdigest()
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_querystring = urllib.parse.urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
canonical_headers += f'{header_name.lower()}:{header_value}\n'
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
@@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
canonical_querystring,
|
||||
canonical_headers,
|
||||
signed_headers,
|
||||
aws_hash('')
|
||||
aws_hash(''),
|
||||
])
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
@@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
return hmac.new(key, msg.encode(), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
@@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode()
|
||||
for value in credential_scope_list:
|
||||
k_signing = aws_hmac_digest(k_signing, value)
|
||||
|
||||
@@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
|
||||
headers['Authorization'] = ', '.join([
|
||||
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
'SignedHeaders=%s' % signed_headers,
|
||||
'Signature=%s' % signature,
|
||||
'{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
f'SignedHeaders={signed_headers}',
|
||||
f'Signature={signature}',
|
||||
])
|
||||
|
||||
return self._download_json(
|
||||
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
video_id, headers=headers)
|
||||
|
||||
@@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor):
|
||||
'timestamp': 1538328802,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
|
||||
'duration': 1930
|
||||
'duration': 1930,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||
_PARTNER_ID = '1719221'
|
||||
@@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor):
|
||||
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
f'kaltura:{self._PARTNER_ID}:{entry_id}',
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
||||
@@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _call_api(self, path, category, playlist_id, note):
|
||||
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
|
||||
path, category, playlist_id), playlist_id, note)
|
||||
return self._download_json(
|
||||
f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}',
|
||||
playlist_id, note)
|
||||
|
||||
def _real_extract(self, url):
|
||||
category, playlist_id = self._match_valid_url(url).groups()
|
||||
@@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor):
|
||||
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
||||
|
||||
entries = [self.url_result(
|
||||
episode['url'], video_title=episode['title']
|
||||
episode['url'], video_title=episode['title'],
|
||||
) for episode in episodes_detail['videos']]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
import math
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
determine_ext,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,8 +21,8 @@ class BanByeBaseIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_playlist_id(url, param='playlist'):
|
||||
return compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
|
||||
return urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get(param, [None])[0]
|
||||
|
||||
def _extract_playlist(self, playlist_id):
|
||||
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
|
||||
@@ -33,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
'info_dict': {
|
||||
@@ -61,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
@@ -80,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
|
||||
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
|
||||
'info_dict': {
|
||||
'id': 'v_a_gPFuC9LoW5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
|
||||
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
|
||||
'uploader': 'wRealu24',
|
||||
'channel_id': 'ch_wrealu24',
|
||||
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||
'upload_date': '20231113',
|
||||
'timestamp': 1699874062,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
|
||||
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8'],
|
||||
}, {
|
||||
# ['src']['hls']['masterPlaylist'] m3u8 only
|
||||
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
|
||||
'info_dict': {
|
||||
'id': 'v_B0rsKWsr-aaa',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:00b254164b82101b3f9e5326037447ed',
|
||||
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
|
||||
'uploader': 'PSTV Piotr Szlachtowicz ',
|
||||
'channel_id': 'ch_KV9EVObkB9wB',
|
||||
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
|
||||
'upload_date': '20240629',
|
||||
'timestamp': 1719646816,
|
||||
'duration': 2377,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
|
||||
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -94,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
|
||||
'id': f'{quality}p',
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||
formats = [{
|
||||
'format_id': f'http-{quality}p',
|
||||
'quality': quality,
|
||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||
} for quality in data['quality']]
|
||||
|
||||
formats = []
|
||||
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
|
||||
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
|
||||
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
for format_id, format_url in traverse_obj(url_data, (
|
||||
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
ext = determine_ext(format_url)
|
||||
is_hls = ext == 'm3u8'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4' if is_hls else ext,
|
||||
'format_id': join_nonempty(is_hls and 'hls', format_id),
|
||||
'protocol': 'm3u8_native' if is_hls else 'https',
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -3,7 +3,6 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
@@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
@@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||
return self._parse_json(self._html_search_regex(
|
||||
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||
rf'data-{attr}=(["\'])({{.+?}})\1', webpage,
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
download_link = tralbum.get('freeDownloadPage')
|
||||
if download_link:
|
||||
track_id = compat_str(tralbum['id'])
|
||||
track_id = str(tralbum['id'])
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
@@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor):
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
if all(isinstance(x, str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
for format_id, f in downloads.items():
|
||||
@@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor):
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
stat_url, track_id, f'Downloading {format_id} JSON',
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
@@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': format_id.split('-')[0],
|
||||
})
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
title = f'{artist} - {track}' if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
@@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
||||
@@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
@@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader_id': 'blazo',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
'playlistend': 2,
|
||||
},
|
||||
'skip': 'Bandcamp imposes download limits.'
|
||||
'skip': 'Bandcamp imposes download limits.',
|
||||
}, {
|
||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||
'info_dict': {
|
||||
@@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, album_id = self._match_valid_url(url).groups()
|
||||
@@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
title += f' - {subtitle}'
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
@@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
@@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'dotscale',
|
||||
'title': 'Discography of dotscale'
|
||||
'title': 'Discography of dotscale',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
|
||||
@@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor):
|
||||
'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
|
||||
'upload_date': '20200324',
|
||||
'timestamp': 1585087895,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_GRAPHQL_GETMETADATA_QUERY = '''
|
||||
@@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) {
|
||||
'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
|
||||
}
|
||||
|
||||
def _call_api(self, video_id, id, operation, note):
|
||||
def _call_api(self, video_id, id_var, operation, note):
|
||||
return self._download_json(
|
||||
'https://api.infowarsmedia.com/graphql', video_id, note=note,
|
||||
headers={
|
||||
'Content-Type': 'application/json; charset=utf-8'
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=json.dumps({
|
||||
'variables': {'id': id},
|
||||
'variables': {'id': id_var},
|
||||
'operationName': operation,
|
||||
'query': self._GRAPHQL_QUERIES[operation]
|
||||
'query': self._GRAPHQL_QUERIES[operation],
|
||||
}).encode('utf8')).get('data')
|
||||
|
||||
def _get_comments(self, video_id, comments, comment_data):
|
||||
@@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) {
|
||||
'tags': [tag.get('name') for tag in video_info.get('tags')],
|
||||
'availability': self._availability(is_unlisted=video_info.get('unlisted')),
|
||||
'comments': comments,
|
||||
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
|
||||
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')),
|
||||
}
|
||||
|
||||
@@ -2,10 +2,10 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
(?:
|
||||
@@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
radio/player/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
''' % _ID_REGEX
|
||||
(?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
|
||||
'''
|
||||
_EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
|
||||
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
@@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
@@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
@@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
@@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor):
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
def __init__(self, error_id):
|
||||
self.id = error_id
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.get('result')
|
||||
@@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
|
||||
f'{self.IE_NAME} returned error: {media_selection_error.id}',
|
||||
expected=True)
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
@@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, format_id),
|
||||
'format_id': f'ref{i}_{format_id}',
|
||||
})
|
||||
elif transfer_format == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
format_id += f'-{bitrate}'
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
@@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
fmt.update({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'url': f'{protocol}://{server}/{application}?{auth_string}',
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'app': f'{application}?{auth_string}',
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
@@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _process_legacy_playlist(self, playlist_id):
|
||||
return self._process_legacy_playlist_url(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||
f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
|
||||
|
||||
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||
return self._download_xml(
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
|
||||
no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is not yet available'
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is no longer available'
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is not currently available'
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
msg = f'Episode {playlist_id} is not available: {reason}'
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind not in ('programme', 'radioProgramme'):
|
||||
continue
|
||||
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||
title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
|
||||
description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
|
||||
description = description_el.text if description_el is not None else None
|
||||
|
||||
def get_programme_id(item):
|
||||
@@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
|
||||
mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
@@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
|
||||
rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
@@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# article with single video embedded with data-playable containing XML playlist
|
||||
# with direct video links as progressiveDownloadUrl (for now these are extracted)
|
||||
@@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
}
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
@@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
def suitable(cls, url):
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||
else super(BBCIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
@@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if playlist:
|
||||
entry = None
|
||||
for key in ('streaming', 'progressiveDownload'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
playlist_url = playlist.get(f'{key}Url')
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
@@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
|
||||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
@@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
|
||||
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
|
||||
[rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
|
||||
rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
|
||||
rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
|
||||
webpage, 'vpid', default=None)
|
||||
|
||||
if programme_id:
|
||||
@@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
@@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})
|
||||
}),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
@@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if blocks:
|
||||
summary = []
|
||||
for block in blocks:
|
||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||
text = try_get(block, lambda x: x['model']['text'], str)
|
||||
if text:
|
||||
summary.append(text)
|
||||
if summary:
|
||||
@@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
return list(filter(None, (
|
||||
self._parse_json(s, playlist_id, fatal=False)
|
||||
for s in re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
@@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
|
||||
entries = []
|
||||
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||
if embed_url and re.match(EMBED_URL, embed_url):
|
||||
entries.append(embed_url)
|
||||
entries.extend(re.findall(
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
rf'setPlaylist\("({EMBED_URL})"\)', webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||
@@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||
video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
|
||||
|
||||
title = media_meta.get('caption')
|
||||
if not title:
|
||||
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||
title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
|
||||
|
||||
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||
|
||||
@@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
single_page = 'page' in urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
@@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
urllib.parse.urljoin(url, next_page), playlist_id,
|
||||
f'Downloading page {page_num}', page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
|
||||
|
||||
@staticmethod
|
||||
def _get_default(episode, key, default_key='default'):
|
||||
@@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
variables['sliceId'] = series_id
|
||||
return self._download_json(
|
||||
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||
'Content-Type': 'application/json'
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||
'variables': variables,
|
||||
}).encode('utf-8'))['data']['programme']
|
||||
}).encode())['data']['programme']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
@@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
return self._download_json(
|
||||
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||
f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
|
||||
pid, query={
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
@@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
|
||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
@@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor):
|
||||
'display_id': 'birds-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor):
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
@@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor):
|
||||
images.append(image)
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'id': str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
|
||||
@@ -23,7 +23,7 @@ class BeegIE(InfoExtractor):
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': '2540839',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
|
||||
@@ -38,7 +38,7 @@ class BeegIE(InfoExtractor):
|
||||
'timestamp': 1643623200,
|
||||
'display_id': '2569965',
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# api/v6 v2
|
||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||
@@ -55,8 +55,8 @@ class BeegIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._download_json(
|
||||
'https://store.externulls.com/facts/file/%s' % video_id,
|
||||
video_id, 'Downloading JSON for %s' % video_id)
|
||||
f'https://store.externulls.com/facts/file/{video_id}',
|
||||
video_id, f'Downloading JSON for {video_id}')
|
||||
|
||||
fc_facts = video.get('fc_facts')
|
||||
first_fact = {}
|
||||
|
||||
@@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor):
|
||||
'upload_date': '20141205',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor):
|
||||
'tags': ['Studienfilm'],
|
||||
'duration': 602.440,
|
||||
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor):
|
||||
subtitles.setdefault(track['language'], []).append({
|
||||
'url': track['source'],
|
||||
'name': track.get('label'),
|
||||
'ext': 'vtt'
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
@@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player'
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
|
||||
@@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
'timestamp': 1673341692,
|
||||
'duration': 109.269,
|
||||
'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
|
||||
}
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
@@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
|
||||
@@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor):
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
return base64.b64decode(urllib.parse.unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
@@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor):
|
||||
video_url = decode_url(encoded_url)
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % height,
|
||||
'format_id': f'{height}p',
|
||||
'height': int(height),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
@@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ class BigoIE(InfoExtractor):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
|
||||
@@ -20,7 +20,7 @@ class BildIE(InfoExtractor):
|
||||
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
@@ -32,7 +32,7 @@ class BildIE(InfoExtractor):
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -31,12 +31,12 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
@@ -47,6 +47,23 @@ from ..utils import (
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||
_wbi_key_cache = {}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||
|
||||
def _check_missing_formats(self, play_info, formats):
|
||||
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||
missing_formats = join_nonempty(*[
|
||||
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||
for fmt in traverse_obj(play_info, (
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
@@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
||||
if missing_formats:
|
||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
||||
if formats:
|
||||
self._check_missing_formats(play_info, formats)
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
formats.append({
|
||||
'url': fragments[0]['url'],
|
||||
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||
**({
|
||||
'fragments': fragments,
|
||||
'protocol': 'http_dash_segments',
|
||||
} if len(fragments) > 1 else {}),
|
||||
**traverse_obj(play_info, {
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
def _get_wbi_key(self, video_id):
|
||||
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
session_data = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||
|
||||
lookup = ''.join(traverse_obj(session_data, (
|
||||
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||
|
||||
# from getMixinKey() in the vendor js
|
||||
mixin_key_enc_tab = [
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||
36, 20, 34, 44, 52,
|
||||
]
|
||||
|
||||
self._wbi_key_cache.update({
|
||||
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||
'ts': time.time(),
|
||||
})
|
||||
return self._wbi_key_cache['key']
|
||||
|
||||
def _sign_wbi(self, params, video_id):
|
||||
params['wts'] = round(time.time())
|
||||
params = {
|
||||
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||
for k, v in sorted(params.items())
|
||||
}
|
||||
query = urllib.parse.urlencode(params)
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
}]
|
||||
}],
|
||||
}
|
||||
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in subs_list:
|
||||
note=f'Extracting subtitle info {cid}')
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
self.report_warning(
|
||||
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||
for s in traverse_obj(video_info, (
|
||||
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||
})
|
||||
return subtitles
|
||||
|
||||
@@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||
return cid_edges
|
||||
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
||||
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||
graph_version = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
|
||||
'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'timestamp': 1488353834,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'old av URL version',
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'id': 'BV11x411K7CN',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'id': 'BV11x411K7CN',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308.36,
|
||||
'upload_date': '20140420',
|
||||
'timestamp': 1397983878,
|
||||
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
'title': '物语中的人物是如何吐槽自己的OP的',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'playlist': [{
|
||||
@@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
}]
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'Specific page of Anthology',
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
|
||||
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2'
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||
},
|
||||
}, {
|
||||
'note': 'legacy flv/mp4 video',
|
||||
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'timestamp': 1458222815,
|
||||
'upload_date': '20160317',
|
||||
'description': '云南方言快乐生产线出品',
|
||||
'duration': float,
|
||||
'uploader': '一笑颠天',
|
||||
'uploader_id': '3916081',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||
},
|
||||
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||
'playlist_count': 19,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1ms411Q7vw_p4_0',
|
||||
'ext': 'flv',
|
||||
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||
'duration': 399.102,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'legacy mp4-only video',
|
||||
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||
'info_dict': {
|
||||
'id': 'BV1nx411u79K',
|
||||
'ext': 'mp4',
|
||||
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||
'timestamp': 1508893551,
|
||||
'upload_date': '20171025',
|
||||
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||
'duration': 80.384,
|
||||
'uploader': '伯远',
|
||||
'uploader_id': '10584494',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': list,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'interactive/split-path video',
|
||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'playlist': [{
|
||||
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'upload_date': '20191021',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles, which requires login',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2', # login required for CC subtitle
|
||||
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
'skip': 'login required for subtitle',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||
'info_dict': {
|
||||
@@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
@@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
aid = video_data.get('aid')
|
||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
@@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
|
||||
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
})
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'formats': self.extract_formats(play_info),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'skip': 'Geo-restricted',
|
||||
}, {
|
||||
@@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': 1922.129,
|
||||
'timestamp': 1602853860,
|
||||
'upload_date': '20201016',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
'duration': 1525.777,
|
||||
'timestamp': 1425074413,
|
||||
'upload_date': '20150227',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
@@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
'title': '鬼灭之刃',
|
||||
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
'playlist_mincount': 26,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
|
||||
'info_dict': {
|
||||
@@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
'duration': 1436.992,
|
||||
'timestamp': 1343185080,
|
||||
'upload_date': '20120725',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
@@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE):
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '1'},
|
||||
}, {
|
||||
@@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
}))
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
first_page = fetch_page(0)
|
||||
metadata = get_metadata(first_page)
|
||||
@@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '3985676',
|
||||
},
|
||||
'playlist_mincount': 178,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://space.bilibili.com/313580179/video',
|
||||
'info_dict': {
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_signature(self, playlist_id):
|
||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||
|
||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||
img_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||
sub_key = traverse_obj(
|
||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||
|
||||
session_key = img_key + sub_key
|
||||
|
||||
signature_values = []
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
signature_values.append(char_at_position)
|
||||
|
||||
return ''.join(signature_values)[:32]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||
if not is_video_url:
|
||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||
'To download audios, add a "/audio" to the URL')
|
||||
|
||||
signature = self._extract_signature(playlist_id)
|
||||
|
||||
def fetch_page(page_idx):
|
||||
query = {
|
||||
'keyword': '',
|
||||
'mid': playlist_id,
|
||||
'order': 'pubdate',
|
||||
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||
'order_avoided': 'true',
|
||||
'platform': 'web',
|
||||
'pn': page_idx + 1,
|
||||
'ps': 30,
|
||||
'tid': 0,
|
||||
'web_location': 1550101,
|
||||
'wts': int(time.time()),
|
||||
}
|
||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
response = self._download_json(
|
||||
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||
query=self._sign_wbi(query, playlist_id),
|
||||
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] in (-352, -401):
|
||||
status_code = response['code']
|
||||
if status_code == -401:
|
||||
raise ExtractorError(
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
elif status_code == -352 and not self.is_logged_in:
|
||||
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||
elif status_code != 0:
|
||||
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
@@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
@@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
**playlist_meta,
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
@@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:\d+',
|
||||
'title': '稍后再看',
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
@@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
@@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'info_dict': {
|
||||
'id': r're:2_\d+',
|
||||
'title': '稍后再看',
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
'skip': 'redirect url & login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
@@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
@@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
@@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
'id': 'kichiku: mad',
|
||||
'title': 'kichiku: mad'
|
||||
'title': 'kichiku: mad',
|
||||
},
|
||||
'playlist_mincount': 45,
|
||||
'params': {
|
||||
'playlistend': 45
|
||||
}
|
||||
'playlistend': 45,
|
||||
},
|
||||
}]
|
||||
|
||||
def _fetch_page(self, api_url, num_pages, query, page_num):
|
||||
parsed_json = self._download_json(
|
||||
api_url, query, query={'Search_key': query, 'pn': page_num},
|
||||
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
||||
note=f'Extracting results from page {page_num} of {num_pages}')
|
||||
|
||||
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
|
||||
if not video_list:
|
||||
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
||||
raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
|
||||
|
||||
for video in video_list:
|
||||
yield self.url_result(
|
||||
'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
|
||||
'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
|
||||
|
||||
def _entries(self, category, subcategory, query):
|
||||
# map of categories : subcategories : RIDs
|
||||
@@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
'manual_vocaloid': 126,
|
||||
'guide': 22,
|
||||
'theatre': 216,
|
||||
'course': 127
|
||||
'course': 127,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
|
||||
query = '%s: %s' % (category, subcategory)
|
||||
query = f'{category}: {subcategory}'
|
||||
|
||||
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
||||
|
||||
@@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
formats = [{
|
||||
'url': play_data['cdns'][0],
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
'vcodec': 'none'
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
|
||||
for a_format in formats:
|
||||
@@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}]
|
||||
}],
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.bilibili.tv/video/av%s/' % video_id,
|
||||
f'http://www.bilibili.tv/video/av{video_id}/',
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
@@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
return json.get('data')
|
||||
|
||||
def json2srt(self, json):
|
||||
data = '\n\n'.join(
|
||||
return '\n\n'.join(
|
||||
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
|
||||
for i, line in enumerate(traverse_obj(json, (
|
||||
'body', lambda _, l: l['content'] and l['from'] and l['to']))))
|
||||
return data
|
||||
|
||||
def _get_subtitles(self, *, ep_id=None, aid=None):
|
||||
sub_json = self._call_api(
|
||||
@@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
note='Downloading login key', errnote='Unable to download login key')['data']
|
||||
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||
data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
's_locale': 'en_US',
|
||||
'isTrusted': 'true'
|
||||
'isTrusted': 'true',
|
||||
}), note='Logging in', errnote='Unable to log in')
|
||||
if login_post.get('code'):
|
||||
if login_post.get('message'):
|
||||
@@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 76.242,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 76.242,
|
||||
'end_time': 161.161,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1325.742,
|
||||
'end_time': 1403.903,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Non-Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
|
||||
@@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 88.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 88.0,
|
||||
'end_time': 156.0,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1173.0,
|
||||
'end_time': 1259.535,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Subtitle with empty content
|
||||
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
|
||||
@@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
@@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 61.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 61.0,
|
||||
'end_time': 134.0,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1290.0,
|
||||
'end_time': 1379.0,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# user generated content comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||
@@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# episode id without intro and outro
|
||||
'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
|
||||
@@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
# Non-Bstation layout, read through episode list
|
||||
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
||||
video_data = traverse_obj(season_json, (
|
||||
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
|
||||
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
|
||||
), expected_type=dict, get_all=False)
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
@@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'parent': replies.get('parent'),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||
}
|
||||
|
||||
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
@@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
chapters = [{
|
||||
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
|
||||
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
|
||||
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}]
|
||||
|
||||
return {
|
||||
@@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
episode_id = str(episode['episode_id'])
|
||||
yield self.url_result(smuggle_url(
|
||||
BiliIntlIE._make_url(episode_id, series_id),
|
||||
self._parse_video_metadata(episode)
|
||||
self._parse_video_metadata(episode),
|
||||
), BiliIntlIE, episode_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
series_info = self._call_api(
|
||||
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||
return self.playlist_result(
|
||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||
@@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor):
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
'info_dict': {
|
||||
'id': '33989',
|
||||
'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
|
||||
'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
|
||||
'ext': 'flv',
|
||||
'title': "太空狼人杀联动,不被爆杀就算赢",
|
||||
'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
|
||||
'title': '太空狼人杀联动,不被爆杀就算赢',
|
||||
'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
|
||||
'timestamp': 1650802769,
|
||||
},
|
||||
'skip': 'not live'
|
||||
'skip': 'not live',
|
||||
}, {
|
||||
'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.bilibili.com/blanc/196',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor):
|
||||
raise ExtractorError('Streamer is not live', expected=True)
|
||||
|
||||
formats = []
|
||||
for qn in self._FORMATS.keys():
|
||||
for qn in self._FORMATS:
|
||||
stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
|
||||
'room_id': room_id,
|
||||
'qn': qn,
|
||||
|
||||
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
@@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@@ -115,7 +118,7 @@ class BitChuteIE(InfoExtractor):
|
||||
continue
|
||||
return {
|
||||
'url': url,
|
||||
'filesize': int_or_none(response.headers.get('Content-Length'))
|
||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
'description': 'md5:2134c37d64fc3a4846787c402956adac',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -196,7 +199,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -209,7 +212,10 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'id': 'wV9Imujxasw9',
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
@@ -224,13 +230,13 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'container': 'playlist-video',
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
}
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
|
||||
@@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor):
|
||||
region = mobj.group('region')
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
|
||||
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
|
||||
duration = info.get('duration')
|
||||
title = info['name']
|
||||
upload_date = info.get('created')
|
||||
|
||||
@@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
|
||||
article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article']
|
||||
|
||||
thumbnails = []
|
||||
primary_photo = article_data.get('primaryPhoto')
|
||||
@@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor):
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id'])
|
||||
elif video_type == 'youtube.com':
|
||||
info['url'] = video['id']
|
||||
elif video_type == 'vine.co':
|
||||
info['url'] = 'https://vine.co/v/%s' % video['id']
|
||||
info['url'] = 'https://vine.co/v/{}'.format(video['id'])
|
||||
else:
|
||||
info['url'] = video_type + video['id']
|
||||
return info
|
||||
@@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE):
|
||||
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Unable to download f4m manifest'
|
||||
]
|
||||
'Unable to download f4m manifest',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||
info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai')
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
||||
@@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor):
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
@@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor):
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'],
|
||||
},
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_OPERATIONNAME = 'webBitePageGetBite'
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
@@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor):
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
'_id': audio_id,
|
||||
},
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
json_result = self._download_json(
|
||||
'https://api.blerp.com/graphql', audio_id,
|
||||
data=json.dumps(data).encode(), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
return {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None),
|
||||
}
|
||||
|
||||
return info_dict
|
||||
|
||||
@@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 76.068,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
token_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, token_id)
|
||||
data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
|
||||
data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
|
||||
data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id)
|
||||
streams = data['streams']
|
||||
formats = [{
|
||||
'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),
|
||||
|
||||
@@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor):
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
|
||||
f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id)
|
||||
formats = []
|
||||
for stream in embed_info['streams']:
|
||||
stream_url = stream.get('url')
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
@@ -9,20 +10,18 @@ class BokeCCBaseIE(InfoExtractor):
|
||||
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||
webpage, 'player params', group='query')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
player_params = urllib.parse.parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format(
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
formats = [{
|
||||
return [{
|
||||
'format_id': format_id,
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'quality': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
@@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(self._match_valid_url(url).group('query'))
|
||||
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query'))
|
||||
if not qs.get('vid') or not qs.get('uid'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
|
||||
video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -38,7 +37,7 @@ class BongaCamsIE(InfoExtractor):
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
amf = self._download_json(
|
||||
'https://%s/tools/amf.php' % host, channel_id,
|
||||
f'https://{host}/tools/amf.php', channel_id,
|
||||
data=urlencode_postdata((
|
||||
('method', 'getRoomData'),
|
||||
('args[]', channel_id),
|
||||
@@ -48,14 +47,14 @@ class BongaCamsIE(InfoExtractor):
|
||||
server_url = amf['localData']['videoServerUrl']
|
||||
|
||||
uploader_id = try_get(
|
||||
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||
amf, lambda x: x['performerData']['username'], str) or channel_id
|
||||
uploader = try_get(
|
||||
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||
amf, lambda x: x['performerData']['displayName'], str)
|
||||
like_count = int_or_none(try_get(
|
||||
amf, lambda x: x['performerData']['loversCount']))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8',
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
return {
|
||||
|
||||
@@ -57,8 +57,7 @@ class BostonGlobeIE(InfoExtractor):
|
||||
|
||||
if video_id and account_id and player_id and embed:
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}')
|
||||
|
||||
if len(entries) == 0:
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
|
||||
'uploader_id': '239068974',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}, {
|
||||
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
|
||||
'info_dict': {
|
||||
'id': '1536173056065',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '18523128264',
|
||||
'uploader': 'Lexi Hennigan',
|
||||
'title': 'iPSC Symposium recording part 1.mp4',
|
||||
'timestamp': 1716228343,
|
||||
'upload_date': '20240520',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = self._match_valid_url(url).groups()
|
||||
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
|
||||
webpage = self._download_webpage(url, file_id or shared_name)
|
||||
|
||||
if not file_id:
|
||||
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
|
||||
request_token = self._search_json(
|
||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
shared_link = f'https://{service}.box.com/s/{shared_name}'
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
@@ -72,20 +84,20 @@ class BoxIE(InfoExtractor):
|
||||
'BoxApi': 'shared_link=' + shared_link,
|
||||
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||
}, query={
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size',
|
||||
})
|
||||
title = f['name']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'shared_link': shared_link
|
||||
'shared_link': shared_link,
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
for url_tmpl in traverse_obj(f, (
|
||||
'representations', 'entries', lambda _, v: v['representation'] == 'dash',
|
||||
'content', 'url_template', {url_or_none}
|
||||
'content', 'url_template', {url_or_none},
|
||||
)):
|
||||
manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query)
|
||||
fmts = self._extract_mpd_formats(manifest_url, file_id)
|
||||
|
||||
@@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor):
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
|
||||
'info_dict': {
|
||||
@@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor):
|
||||
'uploader_id': 'vctwevwntun3o0ikq7af',
|
||||
'uploader': 'Legacy Christian Church',
|
||||
'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg'
|
||||
}
|
||||
'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
|
||||
'info_dict': {
|
||||
@@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor):
|
||||
'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
|
||||
'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
|
||||
'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
|
||||
}
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://childrenshealthdefense.eu/live-stream/',
|
||||
@@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor):
|
||||
'release_date': '20221210',
|
||||
'uploader_id': 're8w0v8hohhvpqtbskpe',
|
||||
'uploader': 'Children\'s Health Defense',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -61,7 +61,7 @@ class BRIE(InfoExtractor):
|
||||
'title': 'Umweltbewusster Häuslebauer',
|
||||
'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
|
||||
'duration': 116,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
|
||||
@@ -74,7 +74,7 @@ class BRIE(InfoExtractor):
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20170208',
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -142,7 +142,7 @@ class BRIE(InfoExtractor):
|
||||
http_format_info = format_info.copy()
|
||||
http_format_info.update({
|
||||
'url': format_url,
|
||||
'format_id': 'http-%s' % asset_type,
|
||||
'format_id': f'http-{asset_type}',
|
||||
})
|
||||
formats.append(http_format_info)
|
||||
server_prefix = xpath_text(asset, 'serverPrefix')
|
||||
@@ -151,7 +151,7 @@ class BRIE(InfoExtractor):
|
||||
rtmp_format_info.update({
|
||||
'url': server_prefix,
|
||||
'play_path': xpath_text(asset, 'fileName'),
|
||||
'format_id': 'rtmp-%s' % asset_type,
|
||||
'format_id': f'rtmp-{asset_type}',
|
||||
})
|
||||
formats.append(rtmp_format_info)
|
||||
return formats
|
||||
|
||||
@@ -52,8 +52,8 @@ class BrainPOPBaseIE(InfoExtractor):
|
||||
'%s': {},
|
||||
'ad_%s': {
|
||||
'format_note': 'Audio description',
|
||||
'source_preference': -2
|
||||
}
|
||||
'source_preference': -2,
|
||||
},
|
||||
}
|
||||
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||
for key_quality, key_index in enumerate(('high', 'low')):
|
||||
@@ -62,7 +62,7 @@ class BrainPOPBaseIE(InfoExtractor):
|
||||
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||
'quality': -1 - key_quality,
|
||||
**additional_key_fields,
|
||||
**extra_fields
|
||||
**extra_fields,
|
||||
}))
|
||||
return formats
|
||||
|
||||
@@ -72,7 +72,7 @@ class BrainPOPBaseIE(InfoExtractor):
|
||||
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': self._ORIGIN
|
||||
'Referer': self._ORIGIN,
|
||||
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||
status_code = int_or_none(login_res['status_code'])
|
||||
if status_code != 1505:
|
||||
@@ -131,12 +131,12 @@ class BrainPOPIE(BrainPOPBaseIE):
|
||||
formats, subtitles = [], {}
|
||||
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||
'language': movie_feature.get('language') or 'en',
|
||||
'language_preference': 10
|
||||
'language_preference': 10,
|
||||
}))
|
||||
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||
'language': lang,
|
||||
'language_preference': -10
|
||||
'language_preference': -10,
|
||||
}))
|
||||
|
||||
# TODO: Do localization fields also have subtitles?
|
||||
@@ -145,7 +145,7 @@ class BrainPOPIE(BrainPOPBaseIE):
|
||||
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||
if lang and url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': urljoin(self._CDN_URL, url)
|
||||
'url': urljoin(self._CDN_URL, url),
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
@@ -185,5 +185,5 @@ class BravoTVIE(AdobePassIE):
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
}))
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor):
|
||||
'description': 'md5:bac35eb0256d1cb17f517f54c79404d5',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg',
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/',
|
||||
'only_matching': True,
|
||||
@@ -30,5 +30,5 @@ class BreitBartIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import base64
|
||||
import re
|
||||
import struct
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -21,6 +18,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@@ -142,7 +140,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
|
||||
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -315,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
try:
|
||||
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
|
||||
object_doc = compat_etree_fromstring(object_str.encode())
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
return
|
||||
|
||||
@@ -323,7 +321,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if fv_el is not None:
|
||||
flashvars = dict(
|
||||
(k, v[0])
|
||||
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
|
||||
for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items())
|
||||
else:
|
||||
flashvars = {}
|
||||
|
||||
@@ -340,32 +338,32 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
params = {}
|
||||
|
||||
playerID = find_param('playerID') or find_param('playerId')
|
||||
if playerID is None:
|
||||
player_id = find_param('playerID') or find_param('playerId')
|
||||
if player_id is None:
|
||||
raise ExtractorError('Cannot find player ID')
|
||||
params['playerID'] = playerID
|
||||
params['playerID'] = player_id
|
||||
|
||||
playerKey = find_param('playerKey')
|
||||
player_key = find_param('playerKey')
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey
|
||||
if player_key is not None:
|
||||
params['playerKey'] = player_key
|
||||
# These fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if videoPlayer is not None:
|
||||
if isinstance(videoPlayer, list):
|
||||
videoPlayer = videoPlayer[0]
|
||||
videoPlayer = videoPlayer.strip()
|
||||
video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if video_player is not None:
|
||||
if isinstance(video_player, list):
|
||||
video_player = video_player[0]
|
||||
video_player = video_player.strip()
|
||||
# UUID is also possible for videoPlayer (e.g.
|
||||
# http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
|
||||
# or http://www8.hp.com/cn/zh/home.html)
|
||||
if not (re.match(
|
||||
r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
|
||||
videoPlayer) or videoPlayer.startswith('ref:')):
|
||||
video_player) or video_player.startswith('ref:')):
|
||||
return None
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
params['linkBaseURL'] = linkBase
|
||||
params['@videoPlayer'] = video_player
|
||||
link_base = find_param('linkBaseURL')
|
||||
if link_base is not None:
|
||||
params['linkBaseURL'] = link_base
|
||||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
@@ -389,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
'https://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -448,13 +446,13 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||
mobj = self._match_valid_url(url)
|
||||
query_str = mobj.group('query')
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
query = urllib.parse.parse_qs(query_str)
|
||||
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
video_player = query.get('@videoPlayer')
|
||||
if video_player:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||
video_id = videoPlayer[0]
|
||||
video_id = video_player[0]
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
@@ -473,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
@@ -483,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
@@ -541,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
})
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += '-%dk' % int(tbr)
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
return format_id
|
||||
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
@@ -654,7 +647,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
@@ -666,7 +659,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
|
||||
'only_matching': True,
|
||||
@@ -804,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
|
||||
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
@@ -833,8 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
|
||||
account_id, player_id, embed, video_id)
|
||||
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
@@ -865,13 +857,13 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
|
||||
account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
policy_key_id = '%s_%s' % (account_id, player_id)
|
||||
policy_key_id = f'{account_id}_{player_id}'
|
||||
policy_key = self.cache.load('brightcove', policy_key_id)
|
||||
policy_key_extracted = False
|
||||
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
@@ -910,7 +902,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
if not policy_key:
|
||||
policy_key = extract_policy_key()
|
||||
policy_key_extracted = True
|
||||
headers['Accept'] = 'application/json;pk=%s' % policy_key
|
||||
headers['Accept'] = f'application/json;pk={policy_key}'
|
||||
try:
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
@@ -936,7 +928,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
'Accept': f'application/json;pk={policy_key}',
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
@@ -16,17 +16,17 @@ class BundesligaIE(InfoExtractor):
|
||||
'upload_date': '20220928',
|
||||
'duration': 146,
|
||||
'timestamp': 1664366511,
|
||||
'description': 'md5:803d4411bd134140c774021dd4b7598b'
|
||||
}
|
||||
'description': 'md5:803d4411bd134140c774021dd4b7598b',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA',
|
||||
'only_matching': True
|
||||
}
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -10,7 +10,7 @@ class BusinessInsiderIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'cjGDb0X9',
|
||||
'ext': 'mp4',
|
||||
'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
|
||||
'title': 'Bananas give you more radiation exposure than living next to a nuclear power plant',
|
||||
'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
|
||||
'upload_date': '20160611',
|
||||
'timestamp': 1465675620,
|
||||
@@ -41,5 +41,5 @@ class BusinessInsiderIE(InfoExtractor):
|
||||
r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
|
||||
webpage, 'jwplatform id')
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||
f'jwplatform:{jwplatform_id}', ie=JWPlatformIE.ie_key(),
|
||||
video_id=video_id)
|
||||
|
||||
@@ -23,8 +23,8 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'uploader': 'Angry Ram',
|
||||
}
|
||||
}]
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
|
||||
'params': {
|
||||
@@ -45,7 +45,7 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'uploader': 're:^Munchkin the',
|
||||
},
|
||||
}]
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
|
||||
'info_dict': {
|
||||
|
||||
@@ -36,7 +36,7 @@ class BYUtvIE(InfoExtractor):
|
||||
'duration': 11645,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
|
||||
@@ -38,7 +38,7 @@ class C56IE(InfoExtractor):
|
||||
return self.url_result(sohu_video_info['url'], 'Sohu')
|
||||
|
||||
page = self._download_json(
|
||||
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||
f'http://vxml.56.com/json/{text_id}/', text_id, 'Downloading video info')
|
||||
|
||||
info = page['info']
|
||||
|
||||
@@ -46,7 +46,7 @@ class C56IE(InfoExtractor):
|
||||
{
|
||||
'format_id': f['type'],
|
||||
'filesize': int(f['filesize']),
|
||||
'url': f['url']
|
||||
'url': f['url'],
|
||||
} for f in info['rfiles']
|
||||
]
|
||||
|
||||
|
||||
@@ -29,8 +29,8 @@ class CallinIE(InfoExtractor):
|
||||
'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
|
||||
'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
|
||||
'episode_number': 1,
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
|
||||
}
|
||||
'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||
@@ -54,7 +54,7 @@ class CallinIE(InfoExtractor):
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png',
|
||||
'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5',
|
||||
'timestamp': 1662100688.005,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||
@@ -78,7 +78,7 @@ class CallinIE(InfoExtractor):
|
||||
'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png',
|
||||
'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c',
|
||||
'timestamp': 1661476708.282,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def try_get_user_name(self, d):
|
||||
@@ -94,7 +94,7 @@ class CallinIE(InfoExtractor):
|
||||
next_data = self._search_nextjs_data(webpage, display_id)
|
||||
episode = next_data['props']['pageProps']['episode']
|
||||
|
||||
id = episode['id']
|
||||
video_id = episode['id']
|
||||
title = episode.get('title') or self._generic_title('', webpage)
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
@@ -125,11 +125,11 @@ class CallinIE(InfoExtractor):
|
||||
|
||||
episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or []
|
||||
episode_number = next(
|
||||
(len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id),
|
||||
(len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id),
|
||||
None)
|
||||
|
||||
return {
|
||||
'id': id,
|
||||
'id': video_id,
|
||||
'_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
@@ -151,5 +151,5 @@ class CallinIE(InfoExtractor):
|
||||
'series_id': show_id,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
'episode_id': id
|
||||
'episode_id': video_id,
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ class CaltransIE(InfoExtractor):
|
||||
'title': 'US-50 : Sacramento : Hwy 50 at 24th',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg',
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,12 +12,12 @@ class CAM4IE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss',
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL')
|
||||
m3u8_playlist = self._download_json(f'https://www.cam4.com/rest/v1.0/profile/{channel_id}/streamInfo', channel_id).get('cdnURL')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_duration,
|
||||
@@ -28,7 +25,7 @@ class CamdemyIE(InfoExtractor):
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
@@ -42,7 +39,7 @@ class CamdemyIE(InfoExtractor):
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'duration': 318,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
@@ -76,12 +73,12 @@ class CamdemyIE(InfoExtractor):
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
video_folder = urllib.parse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
urllib.parse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
video_url = urllib.parse.urljoin(video_folder, file_name)
|
||||
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
@@ -117,35 +114,35 @@ class CamdemyFolderIE(InfoExtractor):
|
||||
'id': '450',
|
||||
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||
},
|
||||
'playlist_mincount': 145
|
||||
'playlist_mincount': 145,
|
||||
}, {
|
||||
# links without trailing slash
|
||||
# and multi-page
|
||||
'url': 'http://www.camdemy.com/folder/853',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
'title': '科學計算 - 使用 Matlab',
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
# with displayMode parameter. For testing the codes to add parameters
|
||||
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
'title': '科學計算 - 使用 Matlab',
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id = self._match_id(url)
|
||||
|
||||
# Add displayMode=list so that all links are displayed in a single page
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||
parsed_url = list(urllib.parse.urlparse(url))
|
||||
query = dict(urllib.parse.parse_qsl(parsed_url[4]))
|
||||
query.update({'displayMode': 'list'})
|
||||
parsed_url[4] = compat_urllib_parse_urlencode(query)
|
||||
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||
parsed_url[4] = urllib.parse.urlencode(query)
|
||||
final_url = urllib.parse.urlunparse(parsed_url)
|
||||
|
||||
page = self._download_webpage(final_url, folder_id)
|
||||
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user