1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-21 23:48:57 +00:00

Merge remote-tracking branch 'origin' into yt-live-from-start-range

This commit is contained in:
Elyse
2023-10-08 00:06:56 -06:00
323 changed files with 13049 additions and 4722 deletions

View File

@@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeSearchURLIE,
YoutubeMusicSearchURLIE,
YoutubeSubscriptionsIE,
YoutubeStoriesIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeYtBeIE,
@@ -123,7 +122,6 @@ from .applepodcasts import ApplePodcastsIE
from .archiveorg import (
ArchiveOrgIE,
YoutubeWebArchiveIE,
VLiveWebArchiveIE,
)
from .arcpublishing import ArcPublishingIE
from .arkena import ArkenaIE
@@ -139,10 +137,6 @@ from .arte import (
ArteTVCategoryIE,
)
from .arnes import ArnesIE
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
)
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atttechchannel import ATTTechChannelIE
@@ -166,6 +160,7 @@ from .awaan import (
AWAANLiveIE,
AWAANSeasonIE,
)
from .axs import AxsIE
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .banbye import (
@@ -215,6 +210,7 @@ from .bild import BildIE
from .bilibili import (
BiliBiliIE,
BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
BiliBiliSearchIE,
BilibiliCategoryIE,
@@ -223,7 +219,11 @@ from .bilibili import (
BiliBiliPlayerIE,
BilibiliSpaceVideoIE,
BilibiliSpaceAudioIE,
BilibiliSpacePlaylistIE,
BilibiliCollectionListIE,
BilibiliSeriesListIE,
BilibiliFavoritesListIE,
BilibiliWatchlaterIE,
BilibiliPlaylistIE,
BiliIntlIE,
BiliIntlSeriesIE,
BiliLiveIE,
@@ -271,6 +271,10 @@ from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .brilliantpala import (
BrilliantpalaElearnIE,
BrilliantpalaClassesIE,
)
from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .buzzfeed import BuzzFeedIE
@@ -292,9 +296,11 @@ from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
from .camwithher import CamWithHerIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .caracoltv import CaracolTvPlayIE
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
@@ -303,6 +309,7 @@ from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
CBCPlayerIE,
CBCPlayerPlaylistIE,
CBCGemIE,
CBCGemPlaylistIE,
CBCGemLiveIE,
@@ -351,6 +358,10 @@ from .chirbit import (
from .cinchcast import CinchcastIE
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
CineverseIE,
CineverseDetailsIE,
)
from .ciscolive import (
CiscoLiveSessionIE,
CiscoLiveSearchIE,
@@ -560,8 +571,10 @@ from .epicon import (
EpiconIE,
EpiconSeriesIE,
)
from .eplus import EplusIbIE
from .epoch import EpochIE
from .eporner import EpornerIE
from .erocast import ErocastIE
from .eroprofile import (
EroProfileIE,
EroProfileAlbumIE,
@@ -939,6 +952,7 @@ from .lastfm import (
from .lbry import (
LBRYIE,
LBRYChannelIE,
LBRYPlaylistIE,
)
from .lci import LCIIE
from .lcp import (
@@ -1012,6 +1026,7 @@ from .lynda import (
LyndaCourseIE
)
from .m6 import M6IE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE
from .mailru import (
MailRuIE,
@@ -1117,6 +1132,7 @@ from .mofosex import (
MofosexEmbedIE,
)
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
@@ -1141,6 +1157,7 @@ from .mtv import (
)
from .muenchentv import MuenchenTVIE
from .murrtube import MurrtubeIE, MurrtubeUserIE
from .museai import MuseAIIE
from .musescore import MuseScoreIE
from .musicdex import (
MusicdexSongIE,
@@ -1288,6 +1305,11 @@ from .ninecninemedia import (
NineCNineMediaIE,
CPTwentyFourIE,
)
from .niconicochannelplus import (
NiconicoChannelPlusIE,
NiconicoChannelPlusChannelVideosIE,
NiconicoChannelPlusChannelLivesIE,
)
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
@@ -1418,7 +1440,7 @@ from .patreon import (
PatreonIE,
PatreonCampaignIE
)
from .pbs import PBSIE
from .pbs import PBSIE, PBSKidsIE
from .pearvideo import PearVideoIE
from .peekvids import PeekVidsIE, PlayVidsIE
from .peertube import (
@@ -1441,6 +1463,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import (
PicartoIE,
PicartoVodIE,
@@ -1498,6 +1521,7 @@ from .polskieradio import (
from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .porncom import PornComIE
from .pornflip import PornFlipIE
from .pornhd import PornHdIE
@@ -1516,7 +1540,7 @@ from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
from .pr0gramm import Pr0grammIE
from .prankcast import PrankCastIE
from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE
@@ -1531,6 +1555,7 @@ from .prx import (
)
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE
from .qqmusic import (
QQMusicIE,
@@ -1551,7 +1576,14 @@ from .radiocanada import (
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import FranceCultureIE, RadioFranceIE
from .radiofrance import (
FranceCultureIE,
RadioFranceIE,
RadioFranceLiveIE,
RadioFrancePodcastIE,
RadioFranceProfileIE,
RadioFranceProgramScheduleIE,
)
from .radiozet import RadioZetPodcastIE
from .radiokapital import (
RadioKapitalIE,
@@ -1582,6 +1614,7 @@ from .rbmaradio import RBMARadioIE
from .rbgtum import (
RbgTumIE,
RbgTumCourseIE,
RbgTumNewCourseIE,
)
from .rcs import (
RCSIE,
@@ -1695,8 +1728,8 @@ from .megatvcom import (
MegaTVComIE,
MegaTVComEmbedIE,
)
from .ant1newsgr import (
Ant1NewsGrWatchIE,
from .antenna import (
AntennaGrWatchIE,
Ant1NewsGrArticleIE,
Ant1NewsGrEmbedIE,
)
@@ -1706,6 +1739,10 @@ from .ruv import (
RuvIE,
RuvSpilaIE
)
from .s4c import (
S4CIE,
S4CSeriesIE
)
from .safari import (
SafariIE,
SafariApiIE,
@@ -1786,7 +1823,10 @@ from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotrim import SmotrimIE
from .snotr import SnotrIE
from .sohu import SohuIE
from .sohu import (
SohuIE,
SohuVIE,
)
from .sonyliv import (
SonyLIVIE,
SonyLIVSeriesIE,
@@ -1854,6 +1894,10 @@ from .srgssr import (
SRGSSRPlayIE,
)
from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
)
from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE
from .steam import (
@@ -1866,7 +1910,6 @@ from .storyfire import (
StoryFireSeriesIE,
)
from .streamable import StreamableIE
from .streamanity import StreamanityIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streamff import StreamFFIE
@@ -1894,6 +1937,11 @@ from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .tass import TassIE
from .tbs import TBSIE
from .tbsjp import (
TBSJPEpisodeIE,
TBSJPProgramIE,
TBSJPPlaylistIE,
)
from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
@@ -1956,10 +2004,6 @@ from .theplatform import (
)
from .thestar import TheStarIE
from .thesun import TheSunIE
from .theta import (
ThetaVideoIE,
ThetaStreamIE,
)
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -2267,6 +2311,8 @@ from .vk import (
VKIE,
VKUserVideosIE,
VKWallPostIE,
VKPlayIE,
VKPlayLiveIE,
)
from .vocaroo import VocarooIE
from .vodlocker import VodlockerIE
@@ -2339,7 +2385,8 @@ from .webofstories import (
)
from .weibo import (
WeiboIE,
WeiboMobileIE
WeiboVideoIE,
WeiboUserIE,
)
from .weiqitv import WeiqiTVIE
from .weverse import (
@@ -2355,6 +2402,7 @@ from .weyyak import WeyyakIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
from .willow import WillowIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE
from .whowatch import WhoWatchIE
from .wistia import (

View File

@@ -12,6 +12,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
str_or_none,
traverse_obj,
try_get,
unescapeHTML,
update_url_query,
@@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
'uploader': 'Behind the News',
'uploader_id': 'behindthenews',
}
}, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': {
'id': '102520540',
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
}
}]
def _real_extract(self, url):
@@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
video = True
if mobj is None:
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
if mobj is None:
mobj = re.search(
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
@@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
urls_info = self._parse_json(
mobj.group('json_data'), video_id, transform_source=js_to_json)
youtube = mobj.group('type') == 'YouTube'
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
video = mobj.group('type') == 'Video' or traverse_obj(
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
if not isinstance(urls_info, list):
urls_info = [urls_info]
@@ -169,20 +180,103 @@ class ABCIViewIE(InfoExtractor):
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
'info_dict': {
'id': 'CO1211V001S00',
'ext': 'mp4',
'title': 'Series 1 Ep 1 Wood For The Trees',
'series': 'Utopia',
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
'upload_date': '20230726',
'uploader_id': 'abc1',
'series_id': 'CO1211V',
'episode_id': 'CO1211V001S00',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Wood For The Trees',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
'timestamp': 1690403700,
},
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name',
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': {
'id': 'LE1927H001S00',
'ext': 'mp4',
'title': "Series 11 Ep 1",
'series': "Gruen",
'title': 'Series 11 Ep 1',
'series': 'Gruen',
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
'upload_date': '20190925',
'uploader_id': 'abc1',
'series_id': 'LE1927H',
'episode_id': 'LE1927H001S00',
'season_number': 11,
'season': 'Season 11',
'episode_number': 1,
'episode': 'Episode 1',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
'timestamp': 1569445289,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode number',
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
'md5': '77cb7d8434440e3b28fbebe331c2456a',
'info_dict': {
'id': 'NC2203H039S00',
'ext': 'mp4',
'title': 'Series 2022 Locking Up Kids',
'series': 'Four Corners',
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
'upload_date': '20221114',
'uploader_id': 'abc1',
'series_id': 'NC2203H',
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
}, {
'note': 'No episode name or number',
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
'md5': '2e17dec06b13cc81dc119d2565289396',
'info_dict': {
'id': 'RF2004Q043S00',
'ext': 'mp4',
'title': 'Series 2021',
'series': 'Landline',
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
'upload_date': '20211205',
'uploader_id': 'abc1',
'series_id': 'RF2004Q',
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'params': {
'skip_download': True,
},
@@ -244,6 +338,8 @@ class ABCIViewIE(InfoExtractor):
'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number,
'episode': self._search_regex(
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
'uploader_id': video_params.get('channel'),
'formats': formats,
'subtitles': subtitles,

View File

@@ -12,7 +12,7 @@ import urllib.parse
import urllib.request
import urllib.response
import uuid
from ..utils.networking import clean_proxies
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
@@ -22,80 +22,26 @@ from ..utils import (
int_or_none,
intlist_to_bytes,
OnDemandPagedList,
request_to_url,
time_seconds,
traverse_obj,
update_url_query,
)
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
"""Add a handler for opening URLs, like _download_webpage"""
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
ydl._opener.add_handler(handler)
def remove_opener(ydl, handler):
'''
Remove handler(s) for opening URLs
@param handler Either handler object itself or handler type.
Specifying handler type will remove all handler which isinstance returns True.
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
find_cp = lambda x: x is handler
removed = []
for meth in dir(handler):
if meth in ["redirect_request", "do_open", "proxy_open"]:
# oops, coincidental match
continue
i = meth.find("_")
protocol = meth[:i]
condition = meth[i + 1:]
if condition.startswith("error"):
j = condition.find("_") + i + 1
kind = meth[j + 1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = opener.handle_error.get(protocol, {})
opener.handle_error[protocol] = lookup
elif condition == "open":
kind = protocol
lookup = opener.handle_open
elif condition == "response":
kind = protocol
lookup = opener.process_response
elif condition == "request":
kind = protocol
lookup = opener.process_request
else:
continue
handlers = lookup.setdefault(kind, [])
if handlers:
handlers[:] = [x for x in handlers if not find_cp(x)]
removed.append(x for x in handlers if find_cp(x))
if removed:
for x in opener.handlers:
if find_cp(x):
x.add_parent(None)
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
rh = ydl._request_director.handlers['Urllib']
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
headers = ydl.params['http_headers'].copy()
proxies = ydl.proxies.copy()
clean_proxies(proxies, headers)
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
assert isinstance(opener, urllib.request.OpenerDirector)
opener.add_handler(handler)
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
class AbemaLicenseHandler(urllib.request.BaseHandler):
@@ -137,11 +83,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = request_to_url(url)
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
'Content-Length': str(len(response_data)),
}, url=url, code=200)
@@ -213,10 +159,7 @@ class AbemaTVBaseIE(InfoExtractor):
})
AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler)
add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True):

View File

@@ -6,10 +6,8 @@ import random
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import (
compat_HTTPError,
compat_b64decode,
)
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
ass_subtitles_timecode,
bytes_to_intlist,
@@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self._HEADERS = {'authorization': 'Bearer ' + access_token}
except ExtractorError as e:
message = None
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
resp = self._parse_json(
e.cause.read().decode(), None, fatal=False) or {}
e.cause.response.read().decode(), None, fatal=False) or {}
message = resp.get('message') or resp.get('code')
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
@@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
})
break
except ExtractorError as e:
if not isinstance(e.cause, compat_HTTPError):
if not isinstance(e.cause, HTTPError):
raise e
if e.cause.code == 401:
if e.cause.status == 401:
# This usually goes away with a different random pkcs1pad, so retry
continue
error = self._parse_json(e.cause.read(), video_id)
error = self._parse_json(e.cause.response.read(), video_id)
message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message)

View File

@@ -2,11 +2,11 @@ import getpass
import json
import re
import time
import urllib.error
import xml.etree.ElementTree as etree
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
NO_DEFAULT,
ExtractorError,
@@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
form_page, urlh = form_page_res
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
if not re.match(r'https?://', post_url):
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
post_url = compat_urlparse.urljoin(urlh.url, post_url)
form_data = self._hidden_inputs(form_page)
form_data.update(data)
return self._download_webpage_handle(
@@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
elif 'automatically signed in with' in provider_redirect_page:
# Seems like comcast is rolling up new way of automatically signing customers
oauth_redirect_url = self._html_search_regex(
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
'oauth redirect (signed)')
# Just need to process the request. No useful data comes back
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
@@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 1
provider_login_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending first bookend',
urlh.url, video_id, 'Sending first bookend',
query=hidden_data)
provider_association_redirect, urlh = post_form(
@@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
})
provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl())
provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
@@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 3
mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending final bookend',
urlh.url, video_id, 'Sending final bookend',
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
@@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 1
provider_login_redirect_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending First Bookend',
urlh.url, video_id, 'Sending First Bookend',
query=hidden_data)
provider_login_redirect_page, urlh = provider_login_redirect_page_res
@@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
})
provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl())
provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
@@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 3
mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending Final Bookend',
urlh.url, video_id, 'Sending Final Bookend',
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
@@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
# based redirect that should be followed.
provider_redirect_page, urlh = provider_redirect_page_res
provider_refresh_redirect_url = extract_redirect_url(
provider_redirect_page, url=urlh.geturl())
provider_redirect_page, url=urlh.url)
if provider_refresh_redirect_url:
provider_redirect_page_res = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
@@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
'requestor_id': requestor_id,
}), headers=mvpd_headers)
except ExtractorError as e:
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise_mvpd_required()
raise
if '<pendingLogout' in session:

View File

@@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
continue
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
if ext == 'm3u8':
info['formats'].extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
info['formats'].extend(fmts)
self._merge_subtitles(subs, target=info['subtitles'])
elif ext == 'f4m':
continue
# info['formats'].extend(self._extract_f4m_formats(

View File

@@ -338,6 +338,7 @@ class BiographyIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': '404 Not Found',
}]
def _real_extract(self, url):

View File

@@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):
resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
asin, note=note, headers={'Content-Type': 'application/json'},
data=json.dumps(data).encode() if data else None,
asin, note=note, headers={
'Content-Type': 'application/json',
'currentpageurl': '/',
'currentplatform': 'dWeb'
}, data=json.dumps(data).encode() if data else None,
query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin,
@@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
'ext': 'mp4',
'title': 'May I Kiss You?',
'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'description': 'md5:a549bfc747973e04feb707833474e59d',
'release_timestamp': 1644710400,
'release_date': '20220213',
@@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
'ext': 'mp4',
'title': 'Jahaan',
'language': 'Hindi',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'description': 'md5:05eb765a77bf703f322f120ec6867339',
'release_timestamp': 1647475200,
'release_date': '20220317',

View File

@@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
# m3u8 download
'skip_download': True,
},
'skip': '404 Not Found',
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,

View File

@@ -1,26 +1,30 @@
import urllib.parse
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
HEADRequest,
ExtractorError,
determine_ext,
make_archive_id,
scale_thumbnails_to_max_format_width,
)
class Ant1NewsGrBaseIE(InfoExtractor):
class AntennaBaseIE(InfoExtractor):
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
info = self._download_json(url, video_id, query={'cid': cid or video_id})
try:
source = info['url']
except KeyError:
raise ExtractorError('no source found for %s' % video_id)
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
video_id, query={'cid': cid or video_id})
if not info.get('url'):
raise ExtractorError(f'No source found for {video_id}')
ext = determine_ext(info['url'])
if ext == 'm3u8':
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
else:
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
thumbnails = scale_thumbnails_to_max_format_width(
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
return {
'id': video_id,
'title': info.get('title'),
@@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor):
}
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
IE_NAME = 'ant1newsgr:watch'
IE_DESC = 'ant1news.gr videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
class AntennaGrWatchIE(AntennaBaseIE):
IE_NAME = 'antenna:watch'
IE_DESC = 'antenna.gr and ant1news.gr videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
_API_PATH = '/templates/data/player'
_TESTS = [{
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
'md5': '95925e6b32106754235f2417e0d2dfab',
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
'info_dict': {
'id': '1506168',
'ext': 'mp4',
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
},
}, {
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
'info_dict': {
'id': '1643812',
'ext': 'mp4',
'format_id': 'mp4',
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ ΕΠΕΙΣΟΔΙΟ 01',
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
},
}]
@@ -52,11 +66,12 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
webpage = self._download_webpage(url, video_id)
info = self._download_and_extract_api_data(video_id, netloc)
info['description'] = self._og_search_description(webpage)
info['description'] = self._og_search_description(webpage, default=None)
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)],
return info
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
class Ant1NewsGrArticleIE(AntennaBaseIE):
IE_NAME = 'ant1newsgr:article'
IE_DESC = 'ant1news.gr articles'
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
@@ -96,7 +111,7 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
class Ant1NewsGrEmbedIE(AntennaBaseIE):
IE_NAME = 'ant1newsgr:embed'
IE_DESC = 'ant1news.gr embedded videos'
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
@@ -121,7 +136,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
canonical_url = self._request_webpage(
HEADRequest(url), video_id,
note='Resolve canonical player URL',
errnote='Could not resolve canonical player URL').geturl()
errnote='Could not resolve canonical player URL').url
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
cid = urllib.parse.parse_qs(query)['cid'][0]

View File

@@ -1,16 +1,15 @@
import json
import re
import urllib.error
import urllib.parse
from .common import InfoExtractor
from .naver import NaverBaseIE
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
HEADRequest,
bug_reports_message,
clean_html,
dict_get,
@@ -899,7 +898,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved.
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
self.raise_no_formats(
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
else:
@@ -926,7 +925,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh:
url = compat_urllib_parse_unquote(urlh.geturl())
url = compat_urllib_parse_unquote(urlh.url)
video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
@@ -947,237 +946,3 @@ class YoutubeWebArchiveIE(InfoExtractor):
if not info.get('title'):
info['title'] = video_id
return info
class VLiveWebArchiveIE(InfoExtractor):
IE_NAME = 'web.archive:vlive'
IE_DESC = 'web.archive.org saved vlive videos'
_VALID_URL = r'''(?x)
(?:https?://)?web\.archive\.org/
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
(?:https?(?::|%3[Aa])//)?(?:
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
)
'''
_TESTS = [{
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
'uploader_url': None,
'uploader': None,
'upload_date': '20150817',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1439816449,
'like_count': int,
'channel': 'Girl\'s Day',
'channel_id': 'FDF27',
'comment_count': int,
'release_timestamp': 1439818140,
'release_date': '20150817',
'duration': 1014,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
'title': '첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20161112',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1478923074,
'like_count': int,
'channel': 'EXO',
'channel_id': 'F94BD',
'comment_count': int,
'release_timestamp': 1478924280,
'release_date': '20161112',
'duration': 906,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
'info_dict': {
'id': '101870',
'ext': 'mp4',
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
'creator': 'Dispatch',
'view_count': int,
'subtitles': 'mincount:6',
'uploader_id': 'V__FRA08071',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20181130',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1543601327,
'like_count': int,
'channel': 'Dispatch',
'channel_id': 'C796F3',
'comment_count': int,
'release_timestamp': 1543601040,
'release_date': '20181130',
'duration': 279,
},
'params': {
'skip_download': True,
},
}]
# The wayback machine has special timestamp and "mode" values:
# timestamp:
# 1 = the first capture
# 2 = the last capture
# mode:
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
for retry in self.RetryManager():
try:
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
raise ExtractorError('Page was not archived', expected=True)
retry.error = e
continue
def _download_archived_json(self, url, video_id, **kwargs):
page = self._download_archived_page(url, video_id, **kwargs)
if not page:
raise ExtractorError('Page was not archived', expected=True)
else:
return self._parse_json(page, video_id)
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
if not m3u8_doc:
return
# M3U8 document should be changed to archive domain
m3u8_doc = m3u8_doc.splitlines()
url_base = m3u8_url.rsplit('/', 1)[0]
first_segment = None
for i, line in enumerate(m3u8_doc):
if not line.startswith('#'):
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
first_segment = first_segment or m3u8_doc[i]
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
fatal=False, note='Check first segment availablity')
if urlh:
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
if subtitles:
self._report_ignoring_subs('m3u8')
return formats
# Closely follows the logic of the ArchiveTeam grab script
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
def _real_extract(self, url):
video_id, url_date = self._match_valid_url(url).group('id', 'date')
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
user_country = traverse_obj(player_info, ('common', 'userCountry'))
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
inkey = self._download_archived_json(
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
'appId': app_id,
'platformType': 'PC',
'gcc': user_country,
'locale': 'en_US',
}, fatal=False)
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
vod_data = self._download_archived_json(
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
'key': inkey.get('inkey'),
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
'sid': '2024',
'ver': '2.0',
'devt': 'html5_pc',
'doct': 'json',
'ptc': 'https',
'sptc': 'https',
'cpt': 'vtt',
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
'pv': '4.26.9',
'dr': '1920x1080',
'cpl': 'en_US',
'lc': 'en_US',
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
'adu': '%2F',
'videoId': vod_id,
'cc': user_country,
})
formats = []
streams = traverse_obj(vod_data, ('streams', ...))
if len(streams) > 1:
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
stream = streams[0]
max_stream = max(
stream.get('videos') or [],
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_stream is not None:
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
# For parts of the project MP4 files were archived
max_video = max(
traverse_obj(vod_data, ('videos', 'list', ...)),
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_video is not None:
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
fatal=False, note='Check video availablity')
if urlh:
formats.append({'url': video_url})
return {
'id': video_id,
'formats': formats,
**traverse_obj(player_info, ('postDetail', 'post', {
'title': ('officialVideo', 'title', {str}),
'creator': ('author', 'nickname', {str}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelCode', {str}),
'duration': ('officialVideo', 'playTime', {int_or_none}),
'view_count': ('officialVideo', 'playCount', {int_or_none}),
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
})),
**traverse_obj(vod_data, ('meta', {
'uploader_id': ('user', 'id', {str}),
'uploader': ('user', 'name', {str}),
'uploader_url': ('user', 'url', {url_or_none}),
'thumbnail': ('cover', 'source', {url_or_none}),
}), expected_type=lambda x: x or None),
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
}

View File

@@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE):
)))
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
if stream['protocol'].startswith('HLS'):
if 'HLS' in stream['protocol']:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
for fmt in fmts:

View File

@@ -1,196 +0,0 @@
import functools
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
int_or_none,
OnDemandPagedList,
parse_age_limit,
strip_or_none,
try_get,
)
class AsianCrushBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
_KALTURA_KEYS = [
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
'widescreen_thumbnail_url', 'screencap_widescreen',
]
_API_SUFFIX = {'retrocrush.tv': '-ott'}
def _call_api(self, host, endpoint, video_id, query, resource):
return self._download_json(
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
'Downloading %s JSON metadata' % resource, query=query,
headers=self.geo_verification_headers())['objects']
def _download_object_data(self, host, object_id, resource):
return self._call_api(
host, 'search', object_id, {'id': object_id}, resource)[0]
def _get_object_description(self, obj):
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
def _parse_video_data(self, video):
title = video['name']
entry_id, partner_id = [None] * 2
for k in self._KALTURA_KEYS:
k_url = video.get(k)
if k_url:
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
if mobj:
partner_id, entry_id = mobj.groups()
break
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
categories = list(filter(None, [c.get('name') for c in meta_categories]))
show_info = video.get('show_info') or {}
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
'ie_key': KalturaIE.ie_key(),
'id': entry_id,
'title': title,
'description': self._get_object_description(video),
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
'categories': categories,
'series': show_info.get('show_name'),
'season_number': int_or_none(show_info.get('season_num')),
'season_id': show_info.get('season_id'),
'episode_number': int_or_none(show_info.get('episode_num')),
}
class AsianCrushIE(AsianCrushBaseIE):
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
'age_limit': 13,
'categories': 'count:5',
'duration': 5812,
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
'only_matching': True,
}, {
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
'only_matching': True,
}]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, video_id)
embed_vars = self._parse_json(self._search_regex(
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
default='{}'), video_id, fatal=False) or {}
video_id = embed_vars.get('entry_id') or video_id
video = self._download_object_data(host, video_id, 'video')
return self._parse_video_data(video)
class AsianCrushPlaylistIE(AsianCrushBaseIE):
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
'info_dict': {
'id': '6447',
'title': 'Fruity Samurai',
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
},
'playlist_count': 13,
}, {
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
'only_matching': True,
}, {
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
'only_matching': True,
}, {
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
'only_matching': True,
}, {
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
'only_matching': True,
}]
_PAGE_SIZE = 1000000000
def _fetch_page(self, domain, parent_id, page):
videos = self._call_api(
domain, 'getreferencedobjects', parent_id, {
'max': self._PAGE_SIZE,
'object_type': 'video',
'parent_id': parent_id,
'start': page * self._PAGE_SIZE,
}, 'page %d' % (page + 1))
for video in videos:
yield self._parse_video_data(video)
def _real_extract(self, url):
host, playlist_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv':
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._html_extract_title(webpage)
if title:
title = re.sub(r'\s*\|\s*.+?$', '', title)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
else:
show = self._download_object_data(host, playlist_id, 'show')
title = show.get('name')
description = self._get_object_description(show)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, host, playlist_id),
self._PAGE_SIZE)
return self.playlist_result(entries, playlist_id, title, description)

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
_API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
error = self._parse_json(e.cause.read(), None)
if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)

View File

@@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {}
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8]
headers = {
'Accept': 'application/json',

87
yt_dlp/extractor/axs.py Normal file
View File

@@ -0,0 +1,87 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
js_to_json,
parse_iso8601,
traverse_obj,
url_or_none,
)
class AxsIE(InfoExtractor):
IE_NAME = 'axs.tv'
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
'md5': '8d97736ae8e50c64df528e5e676778cf',
'info_dict': {
'id': '5f4dc776b70e4f1c194f22ef',
'title': 'Small Town',
'ext': 'mp4',
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
'upload_date': '20230602',
'timestamp': 1685729564,
'duration': 1284.216,
'series': 'Rock & Roll Road Trip with Sammy Hagar',
'season': 2,
'episode': '3',
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
},
}, {
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
'md5': '300ae795cd8f9984652c0949734ffbdc',
'info_dict': {
'id': '5f488148b70e4f392572977c',
'display_id': 'daryl-hall',
'title': 'Daryl Hall',
'ext': 'mp4',
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
'upload_date': '20230214',
'timestamp': 1676403615,
'duration': 2570.668,
'series': 'The Big Interview with Dan Rather',
'season': 3,
'episode': '5',
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage_json_data = self._search_json(
r'mountObj\s*=', webpage, 'video ID data', display_id,
transform_source=js_to_json)
video_id = webpage_json_data['video_id']
company_id = webpage_json_data['company_id']
meta = self._download_json(
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
video_id, query={'device_type': 'desktop_web'})['video']
formats = self._extract_m3u8_formats(
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
subtitles = {}
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
**traverse_obj(meta, {
'title': ('title', {str}),
'description': ('description', {str}),
'series': ('seriestitle', {str}),
'season': ('season', {int}),
'episode': ('episode', {str}),
'duration': ('duration', {float_or_none}),
'timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('thumb', {url_or_none}),
}),
'subtitles': subtitles,
}

View File

@@ -31,7 +31,7 @@ class BanByeBaseIE(InfoExtractor):
class BanByeIE(BanByeBaseIE):
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
@@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
'title': 'Krzysztof Karoń',
'id': 'p_Ld82N6gBw_OJ',
},
'playlist_count': 9,
'playlist_mincount': 9,
}, {
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
'info_dict': {
'id': 'v_kb6_o1Kyq-CD',
'ext': 'mp4',
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
'channel_id': 'ch_QgWnHvDG2fo5',
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
'duration': 597,
'timestamp': 1688642656,
'upload_date': '20230706',
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}]
def _real_extract(self, url):

View File

@@ -2,11 +2,11 @@ import functools
import itertools
import json
import re
import urllib.error
import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..compat import compat_str, compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -15,11 +15,13 @@ from ..utils import (
float_or_none,
get_element_by_class,
int_or_none,
join_nonempty,
js_to_json,
parse_duration,
parse_iso8601,
parse_qs,
strip_or_none,
traverse_obj,
try_get,
unescapeHTML,
unified_timestamp,
@@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
sounds/play/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download
'skip_download': True,
},
}, {
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
'note': 'Audio',
'info_dict': {
'id': 'm0007jz9',
'ext': 'mp4',
'title': 'BBC Proms, 2019, Prom 34: WestEastern Divan Orchestra',
'description': "Live BBC Proms. WestEastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
'duration': 9840,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
@@ -277,7 +264,7 @@ class BBCCoUkIE(InfoExtractor):
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL})
if self._LOGIN_URL in urlh.geturl():
if self._LOGIN_URL in urlh.url:
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
@@ -388,8 +375,8 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
except ExtractorError as e:
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
and e.exc_info[1].code in (403, 404)):
if not (isinstance(e.exc_info[1], HTTPError)
and e.exc_info[1].status in (403, 404)):
raise
fmts = []
formats.extend(fmts)
@@ -472,7 +459,7 @@ class BBCCoUkIE(InfoExtractor):
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
raise
# fallback to legacy playlist
@@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'upload_date': '20190604',
'categories': ['Psychology'],
},
}, {
# BBC Sounds
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
'info_dict': {
'id': 'm001q789',
'ext': 'mp4',
'title': 'The Night Tracks Mix - Music for the darkling hour',
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
'chapters': 'count:8',
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
'uploader': 'Radio 3',
'duration': 1800,
'uploader_id': 'bbc_radio_three',
},
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
'only_matching': True,
@@ -983,7 +984,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
continue
raise
if entry:
@@ -1128,6 +1129,13 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader_id': network.get('id'),
'formats': formats,
'subtitles': subtitles,
'chapters': traverse_obj(preload_state, (
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
'title': ('titles', {lambda x: join_nonempty(
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
'start_time': ('offset', 'start', {float_or_none}),
'end_time': ('offset', 'end', {float_or_none}),
})) or None,
}
bbc3_config = self._parse_json(

View File

@@ -1,6 +1,7 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
unescapeHTML,
)
@@ -8,7 +9,8 @@ from ..utils import (
class BildIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
IE_DESC = 'Bild.de'
_TEST = {
_TESTS = [{
'note': 'static MP4 only',
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
'info_dict': {
@@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 196,
}
}
}, {
'note': 'static MP4 and HLS',
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
'info_dict': {
'id': '85158620',
'ext': 'mp4',
'title': 'Der Sprungturm-Skandal',
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 69,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -27,11 +41,23 @@ class BildIE(InfoExtractor):
video_data = self._download_json(
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
formats = []
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
src_type = src.get('type')
if src_type == 'application/x-mpegURL':
formats.extend(
self._extract_m3u8_formats(
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
elif src_type == 'video/mp4':
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
else:
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
return {
'id': video_id,
'title': unescapeHTML(video_data['title']).strip(),
'description': unescapeHTML(video_data.get('description')),
'url': video_data['clipList'][0]['srces'][0]['src'],
'formats': formats,
'thumbnail': video_data.get('poster'),
'duration': int_or_none(video_data.get('durationSec')),
}

View File

@@ -3,21 +3,24 @@ import functools
import hashlib
import itertools
import math
import re
import time
import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
GeoRestrictedError,
InAdvancePagedList,
OnDemandPagedList,
bool_or_none,
filter_dict,
float_or_none,
format_field,
int_or_none,
join_nonempty,
make_archive_id,
merge_dicts,
mimetype2ext,
@@ -33,27 +36,31 @@ from ..utils import (
unsmuggle_url,
url_or_none,
urlencode_postdata,
variadic,
)
class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
def extract_formats(self, play_info):
format_names = {
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
}
audios = traverse_obj(play_info, ('dash', 'audio', ...))
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
if flac_audio:
audios.append(flac_audio)
formats = [{
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
'acodec': audio.get('codecs'),
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
'vcodec': 'none',
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
'filesize': int_or_none(audio.get('size'))
'filesize': int_or_none(audio.get('size')),
'format_id': str_or_none(audio.get('id')),
} for audio in audios]
formats.extend({
@@ -64,9 +71,13 @@ class BilibiliBaseIE(InfoExtractor):
'height': int_or_none(video.get('height')),
'vcodec': video.get('codecs'),
'acodec': 'none' if audios else None,
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
'filesize': int_or_none(video.get('size')),
'quality': int_or_none(video.get('id')),
'format_id': traverse_obj(
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
('id', {str_or_none}), get_all=False),
'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
@@ -135,9 +146,20 @@ class BilibiliBaseIE(InfoExtractor):
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
yield from children
def _get_episodes_from_season(self, ss_id, url):
season_info = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', ss_id,
note='Downloading season info', query={'season_id': ss_id},
headers={'Referer': url, **self.geo_verification_headers()})
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -233,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫Tech',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
@@ -403,77 +425,94 @@ class BiliBiliIE(BilibiliBaseIE):
class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss897',
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': {
'id': 'ss897',
'id': '267851',
'ext': 'mp4',
'series': '神的记事本',
'season': '神的记事本',
'season_id': 897,
'series': '鬼灭之刃',
'series_id': '4358',
'season': '鬼灭之刃',
'season_id': '26801',
'season_number': 1,
'episode': '你与旅行包',
'episode_number': 2,
'title': '神的记事本第2话 你与旅行包',
'duration': 1428.487,
'timestamp': 1310809380,
'upload_date': '20110716',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'episode': '残酷',
'episode_id': '267851',
'episode_number': 1,
'title': '1 残酷',
'duration': 1425.256,
'timestamp': 1554566400,
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}, {
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
'only_matching': True,
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
or '正在观看预览,大会员免费看全片' in webpage):
elif '正在观看预览,大会员免费看全片' in webpage:
self.raise_login_required('This video is for premium members only')
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
formats = self.extract_formats(play_info)
if (not formats and '成为大会员抢先看' in webpage
and play_info.get('durl') and not play_info.get('dash')):
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
self.raise_login_required('This video is for premium members only')
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next((
idx + 1 for idx, e in enumerate(
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id
), None)
aid = episode_info.get('aid')
return {
'id': video_id,
'formats': formats,
'title': traverse_obj(initial_state, 'h1Title'),
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
'season_id': season_id,
**traverse_obj(bangumi_info, {
'series': ('series', 'series_title', {str}),
'series_id': ('series', 'series_id', {str_or_none}),
'thumbnail': ('square_cover', {url_or_none}),
}),
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season_id': str_or_none(season_id),
'season_number': season_number,
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
'timestamp': int_or_none(episode_info.get('pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
'http_headers': {'Referer': url, **self.geo_verification_headers()},
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
'__post_extractor': self.extract_comments(aid),
'http_headers': headers,
}
class BiliBiliBangumiMediaIE(InfoExtractor):
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
'info_dict': {
@@ -485,16 +524,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
episode_list = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', media_id,
query={'season_id': initial_state['mediaInfo']['season_id']},
note='Downloading season info')['result']['main_section']['episodes']
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
return self.playlist_result((
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
for entry in episode_list), media_id)
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
},
'playlist_mincount': 26
}]
def _real_extract(self, url):
ss_id = self._match_id(url)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
class BilibiliSpaceBaseIE(InfoExtractor):
@@ -575,7 +624,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query)
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
@@ -633,13 +682,35 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
return self.playlist_result(paged_list, playlist_id)
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
def _get_uploader(self, uid, playlist_id):
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
metadata.pop('page_count', None)
metadata.pop('page_size', None)
return metadata, page_list
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
_TESTS = [{
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
'info_dict': {
'id': '2142762_57445',
'title': '《底特律 变人》'
'title': '【完结】《底特律 变人》全结局流程解说',
'description': '',
'uploader': '老戴在此',
'uploader_id': '2142762',
'timestamp': int,
'upload_date': str,
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
},
'playlist_mincount': 31,
}]
@@ -660,22 +731,251 @@ class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
return {
'page_count': math.ceil(entry_count / page_size),
'page_size': page_size,
'title': traverse_obj(page_data, ('meta', 'name'))
'uploader': self._get_uploader(mid, playlist_id),
**traverse_obj(page_data, {
'title': ('meta', 'name', {str}),
'description': ('meta', 'description', {str}),
'uploader_id': ('meta', 'mid', {str_or_none}),
'timestamp': ('meta', 'ptime', {int_or_none}),
'thumbnail': ('meta', 'cover', {url_or_none}),
})
}
def get_entries(page_data):
for entry in page_data.get('archives', []):
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
BiliBiliIE, entry['bvid'])
return self._get_entries(page_data, 'archives')
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
return self.playlist_result(paged_list, playlist_id, metadata['title'])
return self.playlist_result(paged_list, playlist_id, **metadata)
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
_TESTS = [{
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
'info_dict': {
'id': '1958703906_547718',
'title': '直播回放',
'description': '直播回放',
'uploader': '靡烟miya',
'uploader_id': '1958703906',
'timestamp': 1637985853,
'upload_date': '20211127',
'modified_timestamp': int,
'modified_date': str,
},
'playlist_mincount': 513,
}]
def _real_extract(self, url):
mid, sid = self._match_valid_url(url).group('mid', 'sid')
playlist_id = f'{mid}_{sid}'
playlist_meta = traverse_obj(self._download_json(
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
), {
'title': ('data', 'meta', 'name', {str}),
'description': ('data', 'meta', 'description', {str}),
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
})
def fetch_page(page_idx):
return self._download_json(
'https://api.bilibili.com/x/series/archives',
playlist_id, note=f'Downloading page {page_idx}',
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
def get_metadata(page_data):
page_size = page_data['page']['size']
entry_count = page_data['page']['total']
return {
'page_count': math.ceil(entry_count / page_size),
'page_size': page_size,
'uploader': self._get_uploader(mid, playlist_id),
**playlist_meta
}
def get_entries(page_data):
return self._get_entries(page_data, 'archives')
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
return self.playlist_result(paged_list, playlist_id, **metadata)
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
_TESTS = [{
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
'info_dict': {
'id': '1103407912',
'title': '【V2】',
'description': '',
'uploader': '晓月春日',
'uploader_id': '84912',
'timestamp': 1604905176,
'upload_date': '20201109',
'modified_timestamp': int,
'modified_date': str,
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
'view_count': int,
'like_count': int,
},
'playlist_mincount': 22,
}, {
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
'only_matching': True,
}]
def _real_extract(self, url):
fid = self._match_id(url)
list_info = self._download_json(
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
fid, note='Downloading favlist metadata')
if list_info['code'] == -403:
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
entries = self._get_entries(self._download_json(
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
fid, note='Download favlist entries'), 'data')
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
'title': ('title', {str}),
'description': ('intro', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}),
'modified_timestamp': ('mtime', {int_or_none}),
'thumbnail': ('cover', {url_or_none}),
'view_count': ('cnt_info', 'play', {int_or_none}),
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
})))
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
}]
def _real_extract(self, url):
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
watchlater_info = self._download_json(
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
if watchlater_info['code'] == -101:
self.raise_login_required(msg='You need to login to access your watchlater list')
entries = self._get_entries(watchlater_info, ('data', 'list'))
return self.playlist_result(entries, id=list_id, title='稍后再看')
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
'info_dict': {
'id': '5_547718',
'title': '直播回放',
'uploader': '靡烟miya',
'uploader_id': '1958703906',
'timestamp': 1637985853,
'upload_date': '20211127',
},
'playlist_mincount': 513,
}, {
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
'info_dict': {
'id': '5_547718',
},
'playlist_mincount': 513,
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/ml1103407912',
'info_dict': {
'id': '3_1103407912',
'title': '【V2】',
'uploader': '晓月春日',
'uploader_id': '84912',
'timestamp': 1604905176,
'upload_date': '20201109',
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
},
'playlist_mincount': 22,
}, {
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
'info_dict': {
'id': '3_1103407912',
},
'playlist_mincount': 22,
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
}]
def _extract_medialist(self, query, list_id):
for page_num in itertools.count(1):
page_data = self._download_json(
'https://api.bilibili.com/x/v2/medialist/resource/list',
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
)['data']
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
if not page_data.get('has_more', False):
break
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
if error_code == -400 and list_id == 'watchlater':
self.raise_login_required('You need to login to access your watchlater playlist')
elif error_code == -403:
self.raise_login_required('This is a private playlist. You need to login as its owner')
elif error_code == 11010:
raise ExtractorError('Playlist is no longer available', expected=True)
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
query = {
'ps': 20,
'with_current': False,
**traverse_obj(initial_state, {
'type': ('playlist', 'type', {int_or_none}),
'biz_id': ('playlist', 'id', {int_or_none}),
'tid': ('tid', {int_or_none}),
'sort_field': ('sortFiled', {int_or_none}),
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
})
}
metadata = {
'id': f'{query["type"]}_{query["biz_id"]}',
**traverse_obj(initial_state, ('mediaListInfo', {
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}),
'thumbnail': ('cover', {url_or_none}),
})),
}
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
class BilibiliCategoryIE(InfoExtractor):
IE_NAME = 'Bilibili category extractor'
_MAX_RESULTS = 1000000
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
_TESTS = [{
'url': 'https://www.bilibili.com/v/kichiku/mad',
'info_dict': {
@@ -1360,7 +1660,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
class BiliLiveIE(InfoExtractor):
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://live.bilibili.com/196',

View File

@@ -2,9 +2,9 @@ import functools
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
HEADRequest,
OnDemandPagedList,
clean_html,
get_element_by_class,

View File

@@ -1,56 +1,170 @@
import functools
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_text_and_html_by_tag,
get_elements_by_class,
join_nonempty,
js_to_json,
determine_ext,
mimetype2ext,
unified_strdate,
url_or_none,
urljoin,
variadic,
)
from ..utils.traversal import traverse_obj
def html_get_element(tag=None, cls=None):
assert tag or cls, 'One of tag or class is required'
if cls:
func = functools.partial(get_elements_by_class, cls, tag=tag)
else:
func = functools.partial(get_element_text_and_html_by_tag, tag)
def html_get_element_wrapper(html):
return variadic(func(html))[0]
return html_get_element_wrapper
class BpbIE(InfoExtractor):
IE_DESC = 'Bundeszentrale für politische Bildung'
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
_TEST = {
_TESTS = [{
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
'info_dict': {
'id': '297',
'ext': 'mp4',
'creator': 'Kooperative Berlin',
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
'release_date': '20160115',
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
'uploader': 'Bundeszentrale für politische Bildung',
},
}, {
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
'info_dict': {
'id': '522184',
'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
'release_date': '20230621',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
'uploader': 'Bundeszentrale für politische Bildung',
},
}, {
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
'info_dict': {
'id': '518789',
'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
'release_date': '20230302',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
'title': 'md5:3e956f264bb501f6383f10495a401da4',
'uploader': 'Bundeszentrale für politische Bildung',
},
}, {
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
'only_matching': True,
}, {
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
'info_dict': {
'id': '315813',
'ext': 'mp3',
'creator': 'Axel Schröder',
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
'release_date': '20200921',
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
'title': 'Folge 1: Eine Einführung',
'uploader': 'Bundeszentrale für politische Bildung',
},
}, {
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
'info_dict': {
'id': '517806',
'ext': 'mp3',
'creator': 'Bundeszentrale für politische Bildung',
'description': 'md5:594689600e919912aade0b2871cc3fed',
'release_date': '20230127',
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
'title': 'Die Weltanschauung der "Neuen Rechten"',
'uploader': 'Bundeszentrale für politische Bildung',
},
}, {
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
'only_matching': True,
}]
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
def _parse_vue_attributes(self, name, string, video_id):
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
for key, value in attributes.items():
if key.startswith(':'):
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
return attributes
@staticmethod
def _process_source(source):
url = url_or_none(source['src'])
if not url:
return None
source_type = source.get('type', '')
extension = mimetype2ext(source_type)
is_video = source_type.startswith('video')
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
return {
'url': url,
'ext': extension,
'vcodec': None if is_video else 'none',
'quality': 10 if note == 'high' else 0,
'format_note': note,
'format_id': join_nonempty(extension, note),
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall(
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = []
for video_info in video_info_dicts:
video_info = self._parse_json(
video_info, video_id, transform_source=js_to_json, fatal=False)
if not video_info:
continue
video_url = video_info.get('src')
if not video_url:
continue
quality = 'high' if '_high' in video_url else 'low'
formats.append({
'url': video_url,
'quality': 10 if quality == 'high' else 0,
'format_note': quality,
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
})
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
return {
'id': video_id,
'formats': formats,
'title': title,
'description': self._og_search_description(webpage),
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
# This metadata could be interpreted otherwise, but it fits "series" the most
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
'description': join_nonempty(*traverse_obj(webpage, [(
{html_get_element(cls='opening-intro')},
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
), {clean_html}]), delim='\n\n') or None,
'creator': self._html_search_meta('author', webpage),
'uploader': self._html_search_meta('publisher', webpage),
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
'formats': (':sources', ..., {self._process_source}),
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
}),
}

View File

@@ -1,6 +1,6 @@
from .adobepass import AdobePassIE
from ..networking import HEADRequest
from ..utils import (
HEADRequest,
extract_attributes,
float_or_none,
get_element_html_by_class,
@@ -155,7 +155,7 @@ class BravoTVIE(AdobePassIE):
chapters = None
m3u8_url = self._request_webpage(HEADRequest(
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
if 'mpeg_cenc' in m3u8_url:
self.report_drm(video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')

View File

@@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_HTTPError,
compat_parse_qs,
compat_urlparse,
)
from ..networking.exceptions import HTTPError
from ..utils import (
clean_html,
dict_get,
@@ -915,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
json_data = self._download_json(api_url, video_id, headers=headers)
break
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)

View File

@@ -0,0 +1,127 @@
import hashlib
from .common import InfoExtractor
from ..utils import (
ExtractorError,
traverse_obj,
urlencode_postdata,
)
class BrilliantpalaBaseIE(InfoExtractor):
_NETRC_MACHINE = 'brilliantpala'
_DOMAIN = '{subdomain}.brilliantpala.org'
def _initialize_pre_login(self):
self._HOMEPAGE = f'https://{self._DOMAIN}'
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
def _get_logged_in_username(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
if self._LOGIN_API == urlh.url:
self.raise_login_required()
return self._html_search_regex(
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage(
self._LOGIN_API, None, 'Downloading login page'))
login_form.update({
'username': username,
'password': password,
})
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
logged_page = self._download_webpage(
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
data=urlencode_postdata(login_form))
if self._html_search_regex(
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
raise ExtractorError('wrong username or password', expected=True)
# the maximum number of logins is one
if self._html_search_regex(
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
logout_device_form = self._hidden_inputs(logged_page)
self._download_webpage(
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
def _real_extract(self, url):
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
video_id = f'{course_id}-{content_id}'
username = self._get_logged_in_username(url, video_id)
content_json = self._download_json(
self._CONTENT_API.format(content_id=content_id), video_id,
note='Fetching content info', errnote='Unable to fetch content info')
entries = []
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
if not formats:
continue
entries.append({
'id': str(stream['id']),
'title': content_json.get('title'),
'formats': formats,
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
'thumbnail': content_json.get('cover_image'),
})
return self.playlist_result(
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
IE_NAME = 'Brilliantpala:Elearn'
IE_DESC = 'VoD on elearn.brilliantpala.org'
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
_TESTS = [{
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
'only_matching': True,
}, {
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
'info_dict': {
'id': '23577',
'ext': 'mp4',
'title': 'Physical World, Units and Measurements - 1',
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
},
}]
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
IE_NAME = 'Brilliantpala:Classes'
IE_DESC = 'VoD on classes.brilliantpala.org'
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
_TESTS = [{
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
'only_matching': True,
}, {
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
'info_dict': {
'id': '9128',
'ext': 'mp4',
'title': 'Motion in a Straight Line - Class 1',
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
'live_status': 'not_live',
},
'params': {
'skip_download': True,
},
}]
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')

View File

@@ -0,0 +1,39 @@
from .common import InfoExtractor
class Canal1IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
'info_dict': {
'id': '63b39f6b354977084b85ab54',
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
'ext': 'mp4',
},
}, {
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
'info_dict': {
'id': '63b39e93f5fd223aa32250fb',
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
'ext': 'mp4',
},
}, {
# Geo-restricted to Colombia
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self.url_result(
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
display_id=display_id, url_transparent=True)

View File

@@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
# response = self._request_webpage(
# HEADRequest(fmt_url), video_id,
# 'Checking if the video is georestricted')
# if '/blocage' in response.geturl():
# if '/blocage' in response.url:
# raise ExtractorError(
# 'The video is not available in your country',
# expected=True)

View File

@@ -0,0 +1,136 @@
import base64
import json
import uuid
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
traverse_obj,
urljoin,
)
class CaracolTvPlayIE(InfoExtractor):
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
_NETRC_MACHINE = 'caracoltv-play'
_TESTS = [{
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
'info_dict': {
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
'title': 'La teoría del promedio',
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
},
'playlist_count': 6,
}, {
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
'info_dict': {
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
'title': 'Ella',
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
},
'playlist_count': 10,
}, {
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
'info_dict': {
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
'title': 'La vuelta al mundo en 80 risas 2022',
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
},
'playlist_count': 17,
}, {
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
'only_matching': True,
}]
_USER_TOKEN = None
def _extract_app_token(self, webpage):
config_js_path = self._search_regex(
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
mediation_config = {} if not config_js_path else self._search_json(
r'mediation\s*:', self._download_webpage(
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
'mediation_config', None, transform_source=js_to_json, fatal=False)
key = traverse_obj(
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
secret = traverse_obj(
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
def _perform_login(self, email, password):
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
app_token = self._extract_app_token(webpage)
bearer_token = self._download_json(
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
headers={'Authorization': f'Basic {app_token}'})['token']
self._USER_TOKEN = self._download_json(
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
'Content-Type': 'application/json',
'Authorization': f'Bearer {bearer_token}',
}, data=json.dumps({
'device_data': {
'device_id': str(uuid.uuid4()),
'device_token': '',
'device_type': 'web'
},
'login_data': {
'enabled': True,
'email': email,
'password': password,
}
}).encode())['user_token']
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
return {
'id': video_data['id'],
'title': video_data.get('name'),
'description': video_data.get('description'),
'formats': formats,
'subtitles': subtitles,
'thumbnails': traverse_obj(
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
'series_id': series_id,
'season_id': season_id,
'season_number': int_or_none(season_number),
'episode_number': int_or_none(video_data.get('item_order')),
'is_live': video_data.get('entry_type') == 3,
}
def _extract_series_seasons(self, seasons, series_id):
for season in seasons:
api_response = self._download_json(
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
season_number = season.get('order')
for episode in api_response['items']:
yield self._extract_video(episode, series_id, season['id'], season_number)
def _real_extract(self, url):
series_id = self._match_id(url)
if self._USER_TOKEN is None:
self._perform_login('guest@inmobly.com', 'Test@gus1')
api_response = self._download_json(
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
if not api_response.get('seasons'):
return self._extract_video(api_response)
return self.playlist_result(
self._extract_series_seasons(api_response['seasons'], series_id),
series_id, **traverse_obj(api_response, {
'title': 'name',
'description': 'description',
}))

View File

@@ -2,6 +2,7 @@ import re
import json
import base64
import time
import urllib.parse
from .common import InfoExtractor
from ..compat import (
@@ -65,6 +66,7 @@ class CBCIE(InfoExtractor):
'uploader': 'CBCC-NEW',
'timestamp': 255977160,
},
'skip': '404 Not Found',
}, {
# multiple iframes
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
@@ -96,7 +98,7 @@ class CBCIE(InfoExtractor):
# multiple CBC.APP.Caffeine.initInstance(...)
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
'info_dict': {
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME
'id': 'dog-indoor-exercise-winter-1.3928238',
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
},
@@ -161,7 +163,7 @@ class CBCPlayerIE(InfoExtractor):
'upload_date': '20160210',
'uploader': 'CBCC-NEW',
},
'skip': 'Geo-restricted to Canada',
'skip': 'Geo-restricted to Canada and no longer available',
}, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'http://www.cbc.ca/player/play/2657631896',
@@ -174,6 +176,9 @@ class CBCPlayerIE(InfoExtractor):
'timestamp': 1425704400,
'upload_date': '20150307',
'uploader': 'CBCC-NEW',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [],
'duration': 494.811,
},
}, {
'url': 'http://www.cbc.ca/player/play/2164402062',
@@ -186,6 +191,28 @@ class CBCPlayerIE(InfoExtractor):
'timestamp': 1320410746,
'upload_date': '20111104',
'uploader': 'CBCC-NEW',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [],
'duration': 186.867,
},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2249992771553',
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
'info_dict': {
'id': '2249992771553',
'ext': 'mp4',
'title': 'The National | Womens soccer pay, Florida seawater, Swift quake',
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
'timestamp': 1690596000,
'duration': 2716.333,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'chapters': 'count:5',
'upload_date': '20230729',
},
}]
@@ -199,9 +226,42 @@ class CBCPlayerIE(InfoExtractor):
'force_smil_url': True
}),
'id': video_id,
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
}
class CBCPlayerPlaylistIE(InfoExtractor):
IE_NAME = 'cbc.ca:player:playlist'
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/tv shows/the national/latest broadcast',
}
}, {
'url': 'https://www.cbc.ca/player/news/Canada/North',
'playlist_mincount': 25,
'info_dict': {
'id': 'news/canada/north',
}
}]
def _real_extract(self, url):
playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
webpage = self._download_webpage(url, playlist_id)
json_content = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
def entries():
for video_id in traverse_obj(json_content, (
'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
)):
yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
return self.playlist_result(entries(), playlist_id)
class CBCGemIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
@@ -280,12 +340,12 @@ class CBCGemIE(InfoExtractor):
data = json.dumps({'jwt': sig}).encode()
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
None, data=data, headers=headers)
None, data=data, headers=headers, expected_status=426)
cbc_access_token = resp['accessToken']
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
None, headers=headers)
None, headers=headers, expected_status=426)
return resp['claimsToken']
def _get_claims_token_expiry(self):
@@ -417,6 +477,10 @@ class CBCGemPlaylistIE(InfoExtractor):
'id': 'schitts-creek/s06',
'title': 'Season 6',
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
'series': 'Schitt\'s Creek',
'season_number': 6,
'season': 'Season 6',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
},
}, {
'url': 'https://gem.cbc.ca/schitts-creek/s06',

View File

@@ -101,6 +101,7 @@ class CBSIE(CBSBaseIE):
# m3u8 download
'skip_download': True,
},
'skip': 'Subscription required',
}, {
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
'info_dict': {
@@ -117,6 +118,7 @@ class CBSIE(CBSBaseIE):
},
'expected_warnings': [
'This content expired on', 'No video formats found', 'Requested format is not available'],
'skip': '404 Not Found',
}, {
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
'only_matching': True,

View File

@@ -7,9 +7,9 @@ import zlib
from .anvato import AnvatoIE
from .common import InfoExtractor
from .paramountplus import ParamountPlusIE
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
HEADRequest,
UserNotLive,
determine_ext,
float_or_none,

View File

@@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
'id': '30c3',
},
'playlist_count': 135,
}, {
'url': 'https://media.ccc.de/c/DS2023',
'info_dict': {
'title': 'Datenspuren 2023',
'id': 'DS2023',
},
'playlist_count': 37
}]
def _real_extract(self, url):
playlist_id = self._match_id(url).lower()
playlist_id = self._match_id(url)
conf = self._download_json(
'https://media.ccc.de/public/conferences/' + playlist_id,

View File

@@ -1,20 +1,20 @@
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..networking import Request
from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
str_or_none,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
)
USER_AGENTS = {
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
@@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, playlist_id)
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
parsed_url = compat_urllib_parse_urlparse(urlh.url)
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
@@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
entries = []
for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request(
req = Request(
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
req.add_header('x-addr', '127.0.0.1')
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
req.headers['x-addr'] = '127.0.0.1'
req.headers['X-Requested-With'] = 'XMLHttpRequest'
if user_agent:
req.add_header('User-Agent', user_agent)
req.add_header('Referer', url)
req.headers['User-Agent'] = user_agent
req.headers['Referer'] = url
playlistpage = self._download_json(req, playlist_id, fatal=False)
@@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
req = Request(compat_urllib_parse_unquote(playlist_url))
req.headers['Referer'] = url
playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist:

View File

@@ -1,6 +1,6 @@
import json
import urllib.error
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
float_or_none,
@@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
})
except ExtractorError as e:
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
or isinstance(e.cause, json.JSONDecodeError)):
self.raise_login_required(method='cookies')
raise

View File

@@ -0,0 +1,136 @@
import re
from .common import InfoExtractor
from ..utils import (
filter_dict,
int_or_none,
parse_age_limit,
smuggle_url,
traverse_obj,
unsmuggle_url,
url_or_none,
)
class CineverseBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
'cineverse.com',
'asiancrush.com',
'dovechannel.com',
'screambox.com',
'midnightpulp.com',
'fandor.com',
'retrocrush.tv',
)))
class CineverseIE(CineverseBaseIE):
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
_TESTS = [{
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
'skip': 'geo-blocked',
'info_dict': {
'title': 'Women Who Flirt',
'ext': 'mp4',
'id': 'DMR00018919',
'modified_timestamp': 1678744575289,
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
'duration': 5811.597,
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
'age_limit': 13,
}
}, {
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
'skip': 'geo-blocked',
'info_dict': {
'title': 'Archenemy! Crystal Bowie',
'ext': 'mp4',
'id': '1000000023016',
'episode_number': 3,
'season_number': 1,
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
'age_limit': 0,
'episode': 'Episode 3',
'season': 'Season 1',
'duration': 1485.067,
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
'series': 'Space Adventure COBRA (Original Japanese)',
}
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, default={})
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
video_id = self._match_id(url)
html = self._download_webpage(url, video_id)
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
if idetails.get('err_code') == 1200:
self.raise_geo_restricted(
'This video is not available from your location due to geo restriction. '
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
countries=smuggled_data.get('geo_countries'))
return {
'subtitles': filter_dict({
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
}),
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
**traverse_obj(idetails, {
'title': 'title',
'id': ('details', 'item_id'),
'description': ('details', 'description'),
'duration': ('duration', {lambda x: x / 1000}),
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
'season_number': ('details', 'season', {int_or_none}),
'episode_number': ('details', 'episode', {int_or_none}),
'age_limit': ('details', 'rating_code', {parse_age_limit}),
'series': ('details', 'series_details', 'title'),
}),
}
class CineverseDetailsIE(CineverseBaseIE):
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
_TESTS = [{
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
'playlist_mincount': 30,
'info_dict': {
'title': 'Space Adventure COBRA (Original Japanese)',
'id': '1000000023012',
}
}, {
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
'info_dict': {
'id': 'NNVG4938',
'ext': 'mp4',
'title': 'Hansel and Gretel',
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
'duration': 7030.732,
},
}]
def _real_extract(self, url):
host, series_id = self._match_valid_url(url).group('host', 'id')
html = self._download_webpage(url, series_id)
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
geoblocked = traverse_obj(pageprops, (
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
def item_result(item):
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
if geoblocked:
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
return self.url_result(item_url, CineverseIE)
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
if season:
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
return item_result(pageprops['itemDetailsData'])

View File

@@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
if rcid:
webpage = self._download_webpage(url, None, note='Getting video ID')
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
url = self._request_webpage(url, None, note='Resolving final URL').geturl()
url = self._request_webpage(url, None, note='Resolving final URL').url
mobj = self._match_valid_url(url)
subdomain = mobj.group('subdomain')
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
@@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
if urlh.getcode() == 403:
if urlh.status == 403:
if stream['code'] == 53004:
self.raise_login_required()
if stream['code'] == 53005:
@@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
'This video is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
if urlh.getcode() == 429:
if urlh.status == 429:
self.raise_login_required(
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
method='cookies')

View File

@@ -19,6 +19,7 @@ class CNBCIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
@@ -49,6 +50,7 @@ class CNBCVideoIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):

View File

@@ -17,15 +17,26 @@ import subprocess
import sys
import time
import types
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from ..compat import functools # isort: split
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..compat import (
compat_etree_fromstring,
compat_expanduser,
compat_os_name,
urllib_req_to_req,
)
from ..cookies import LenientSimpleCookie
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..downloader.hls import HlsFD
from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
network_exceptions,
)
from ..utils import (
IDENTITY,
JSON_LD_RE,
@@ -34,7 +45,6 @@ from ..utils import (
FormatSorter,
GeoRestrictedError,
GeoUtils,
HEADRequest,
LenientJSONDecoder,
Popen,
RegexNotFoundError,
@@ -60,7 +70,6 @@ from ..utils import (
js_to_json,
mimetype2ext,
netrc_from_content,
network_exceptions,
orderedSet,
parse_bitrate,
parse_codecs,
@@ -70,7 +79,6 @@ from ..utils import (
parse_resolution,
sanitize_filename,
sanitize_url,
sanitized_Request,
smuggle_url,
str_or_none,
str_to_int,
@@ -82,8 +90,6 @@ from ..utils import (
unescapeHTML,
unified_strdate,
unified_timestamp,
update_Request,
update_url_query,
url_basename,
url_or_none,
urlhandle_detect_ext,
@@ -224,7 +230,8 @@ class InfoExtractor:
width : height ratio as float.
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean
* has_drm True if the format has DRM and cannot be downloaded.
'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders.
@@ -722,11 +729,11 @@ class InfoExtractor:
except UnsupportedError:
raise
except ExtractorError as e:
e.video_id = e.video_id or self.get_temp_id(url),
e.video_id = e.video_id or self.get_temp_id(url)
e.ie = e.ie or self.IE_NAME,
e.traceback = e.traceback or sys.exc_info()[2]
raise
except http.client.IncompleteRead as e:
except IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -785,20 +792,25 @@ class InfoExtractor:
@staticmethod
def __can_accept_status_code(err, expected_status):
assert isinstance(err, urllib.error.HTTPError)
assert isinstance(err, HTTPError)
if expected_status is None:
return False
elif callable(expected_status):
return expected_status(err.code) is True
return expected_status(err.status) is True
else:
return err.code in variadic(expected_status)
return err.status in variadic(expected_status)
def _create_request(self, url_or_request, data=None, headers=None, query=None):
if isinstance(url_or_request, urllib.request.Request):
return update_Request(url_or_request, data=data, headers=headers, query=query)
if query:
url_or_request = update_url_query(url_or_request, query)
return sanitized_Request(url_or_request, data, headers or {})
self._downloader.deprecation_warning(
'Passing a urllib.request.Request to _create_request() is deprecated. '
'Use yt_dlp.networking.common.Request instead.')
url_or_request = urllib_req_to_req(url_or_request)
elif not isinstance(url_or_request, Request):
url_or_request = Request(url_or_request)
url_or_request.update(data=data, headers=headers, query=query)
return url_or_request
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
"""
@@ -834,14 +846,9 @@ class InfoExtractor:
try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError):
if isinstance(err, HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
# effects of <https://bugs.python.org/issue15002>
# introduced in Python 3.4.1.
err.fp._error = err
return err.fp
return err.response
if errnote is False:
return False
@@ -973,11 +980,11 @@ class InfoExtractor:
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
self.to_screen('Dumping request to ' + urlh.url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self.get_param('write_pages'):
filename = self._request_dump_filename(urlh.geturl(), video_id)
filename = self._request_dump_filename(urlh.url, video_id)
self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@ -1035,7 +1042,7 @@ class InfoExtractor:
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.full_url, video_id)
filename = self._request_dump_filename(url_or_request.url, video_id)
self.to_screen(f'Loading request from {filename}')
try:
with open(filename, 'rb') as dumpf:
@@ -1109,7 +1116,7 @@ class InfoExtractor:
while True:
try:
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
except http.client.IncompleteRead as e:
except IncompleteRead as e:
try_count += 1
if try_count >= tries:
raise e
@@ -1680,7 +1687,7 @@ class InfoExtractor:
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
@@ -1806,7 +1813,7 @@ class InfoExtractor:
return []
manifest, urlh = res
manifest_url = urlh.geturl()
manifest_url = urlh.url
return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
@@ -1965,7 +1972,7 @@ class InfoExtractor:
return [], {}
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
m3u8_url = urlh.url
return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
@@ -1979,11 +1986,7 @@ class InfoExtractor:
errnote=None, fatal=True, data=None, headers={}, query={},
video_id=None):
formats, subtitles = [], {}
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
]), m3u8_doc)
has_drm = HlsFD._has_drm(m3u8_doc)
def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
@@ -2245,18 +2248,10 @@ class InfoExtractor:
if res is False:
assert not fatal
return [], {}
smil, urlh = res
smil_url = urlh.geturl()
namespace = self._parse_smil_namespace(smil)
fmts = self._parse_smil_formats(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
subs = self._parse_smil_subtitles(
smil, namespace=namespace)
return fmts, subs
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
namespace=self._parse_smil_namespace(smil))
def _extract_smil_formats(self, *args, **kwargs):
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
@@ -2270,7 +2265,7 @@ class InfoExtractor:
return {}
smil, urlh = res
smil_url = urlh.geturl()
smil_url = urlh.url
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
@@ -2282,9 +2277,8 @@ class InfoExtractor:
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
namespace = self._parse_smil_namespace(smil)
formats = self._parse_smil_formats(
formats, subtitles = self._parse_smil_formats_and_subtitles(
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
video_id = os.path.splitext(url_basename(smil_url))[0]
title = None
@@ -2323,7 +2317,14 @@ class InfoExtractor:
return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
def _parse_smil_formats(self, *args, **kwargs):
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('SMIL')
return fmts
def _parse_smil_formats_and_subtitles(
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase')
@@ -2331,7 +2332,7 @@ class InfoExtractor:
base = b
break
formats = []
formats, subtitles = [], {}
rtmp_count = 0
http_count = 0
m3u8_count = 0
@@ -2379,8 +2380,9 @@ class InfoExtractor:
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
self._merge_subtitles(m3u8_subs, target=subtitles)
if len(m3u8_formats) == 1:
m3u8_count += 1
m3u8_formats[0].update({
@@ -2401,11 +2403,15 @@ class InfoExtractor:
f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src_url, video_id, mpd_id='dash', fatal=False))
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
src_url, video_id, mpd_id='dash', fatal=False)
formats.extend(mpd_formats)
self._merge_subtitles(mpd_subs, target=subtitles)
elif re.search(r'\.ism/[Mm]anifest', src_url):
formats.extend(self._extract_ism_formats(
src_url, video_id, ism_id='mss', fatal=False))
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
src_url, video_id, ism_id='mss', fatal=False)
formats.extend(ism_formats)
self._merge_subtitles(ism_subs, target=subtitles)
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1
formats.append({
@@ -2436,7 +2442,10 @@ class InfoExtractor:
'format_note': 'SMIL storyboards',
})
return formats
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
self._merge_subtitles(smil_subs, target=subtitles)
return formats, subtitles
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
urls = []
@@ -2462,7 +2471,7 @@ class InfoExtractor:
return []
xspf, urlh = res
xspf_url = urlh.geturl()
xspf_url = urlh.url
return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url,
@@ -2533,7 +2542,7 @@ class InfoExtractor:
return [], {}
# We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.geturl()
mpd_url = urlh.url
mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
@@ -2919,7 +2928,7 @@ class InfoExtractor:
if ism_doc is None:
return [], {}
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
"""

View File

@@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
)
'''
_TESTS = [{
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
'info_dict': {
'id': '870923331648',
'ext': 'mp4',
@@ -54,6 +54,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
'skip_download': True,
},
'expected_warnings': ['Failed to parse JSON'],
# FIXME: yt-dlp wrongly raises for geo restriction
}, {
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
'only_matching': True,

View File

@@ -4,7 +4,7 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
determine_ext,
float_or_none,
@@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
errnote='Unable to download media JSON')
except ExtractorError as e:
# 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
continue
raise

View File

@@ -1,7 +1,7 @@
import base64
import urllib.error
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
float_or_none,
@@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
_AUTH_HEADERS = None
_API_ENDPOINT = None
_BASIC_AUTH = None
_QUERY = {}
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
'': 'en-US',
'es': 'es-419',
'es-es': 'es-ES',
'fr': 'fr-FR',
'it': 'it-IT',
'pt-br': 'pt-BR',
'pt-pt': 'pt-PT',
'ru': 'ru-RU',
'hi': 'hi-IN',
}
@property
def is_logged_in(self):
return self._get_cookies(self._BASE_URL).get('etp_rt')
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
def _perform_login(self, username, password):
if self.is_logged_in:
@@ -62,49 +75,49 @@ class CrunchyrollBaseIE(InfoExtractor):
if not self.is_logged_in:
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
def _update_query(self, lang):
if lang in CrunchyrollBaseIE._QUERY:
return
webpage = self._download_webpage(
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
'locale': ('localization', 'locale'),
}) or None
if CrunchyrollBaseIE._BASIC_AUTH:
return
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
return
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
if not CrunchyrollBaseIE._BASIC_AUTH:
cx_api_param = self._CLIENT_ID[self.is_logged_in]
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
auth_response = self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
try:
auth_response = self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
raise
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
self._update_query(lang)
self._update_auth()
if not endpoint.startswith('/'):
endpoint = f'/{endpoint}'
query = query.copy()
locale = self._locale_from_language(lang)
if locale:
query['locale'] = locale
return self._download_json(
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
def _call_api(self, path, internal_id, lang, note='api', query={}):
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
@@ -114,7 +127,7 @@ class CrunchyrollBaseIE(InfoExtractor):
result = self._call_base_api(
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
except ExtractorError as error:
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
return None
raise
@@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
IE_NAME = 'crunchyroll'
_VALID_URL = r'''(?x)
https?://(?:beta\.|www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
watch/(?!concert|musicvideo)(?P<id>\w+)'''
_TESTS = [{
# Premium only
@@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
},
'playlist_mincount': 5,
}, {
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
'only_matching': True,
}, {
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
@@ -490,8 +503,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
_TESTS = [{
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
'info_dict': {
'ext': 'mp4',
'id': 'MV5B02C79',
'display_id': 'egaono-hana',
'title': 'Egaono Hana',
'track': 'Egaono Hana',
'artist': 'Goose house',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
'info_dict': {
'ext': 'mp4',
@@ -519,11 +545,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
'only_matching': True,
}]
_API_ENDPOINT = 'music'

View File

@@ -1,10 +1,8 @@
import time
from .common import InfoExtractor
from ..utils import (
int_or_none,
HEADRequest,
)
from ..networking import HEADRequest
from ..utils import int_or_none
class CultureUnpluggedIE(InfoExtractor):

View File

@@ -1,9 +1,9 @@
import hashlib
import re
import time
import urllib.error
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
classproperty,
@@ -105,7 +105,7 @@ class DacastVODIE(DacastBaseIE):
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
except ExtractorError as e:
# CDN will randomly respond with 403
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
retry.error = e
continue
raise

View File

@@ -3,7 +3,7 @@ import json
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json(
e.cause.read().decode(), xid)['error_description'], expected=True)
e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token

View File

@@ -3,8 +3,8 @@ import string
from .discoverygo import DiscoveryGoBaseIE
from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError
from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE):
@@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
e_description = self._parse_json(
e.cause.read().decode(), display_id)['description']
e.cause.response.read().decode(), display_id)['description']
if 'resource not available for country' in e_description:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
if 'Authorized Networks' in e_description:

View File

@@ -1,31 +1,72 @@
import time
import hashlib
import re
import urllib
import uuid
from .common import InfoExtractor
from .openload import PhantomJSwrapper
from ..utils import (
ExtractorError,
UserNotLive,
determine_ext,
int_or_none,
js_to_json,
parse_resolution,
str_or_none,
traverse_obj,
unescapeHTML,
unified_strdate,
url_or_none,
urlencode_postdata,
urljoin,
)
class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼'
class DouyuBaseIE(InfoExtractor):
def _download_cryptojs_md5(self, video_id):
for url in [
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
]:
js_code = self._download_webpage(
url, video_id, note='Downloading signing dependency', fatal=False)
if js_code:
self.cache.store('douyu', 'crypto-js-md5', js_code)
return js_code
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
def _get_cryptojs_md5(self, video_id):
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
def _calc_sign(self, sign_func, video_id, a):
b = uuid.uuid4().hex
c = round(time.time())
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
phantom = PhantomJSwrapper(self)
result = phantom.execute(js_script, video_id,
note='Executing JS signing script').strip()
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
def _search_js_sign_func(self, webpage, fatal=True):
# The greedy look-behind ensures last possible script tag is matched
return self._search_regex(
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
class DouyuTVIE(DouyuBaseIE):
IE_DESC = '斗鱼直播'
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'url': 'https://www.douyu.com/pigff',
'info_dict': {
'id': '17732',
'display_id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.png',
'uploader': '7师傅',
'id': '24422',
'display_id': 'pigff',
'ext': 'mp4',
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
'thumbnail': str,
'uploader': 'pigff',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
@@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
'only_matching': True,
}]
def _get_sign_func(self, room_id, video_id):
return self._download_json(
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
note='Getting signing script')['data'][f'room{room_id}']
def _extract_stream_formats(self, stream_formats):
formats = []
for stream_info in traverse_obj(stream_formats, (..., 'data')):
stream_url = urljoin(
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
if stream_url:
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
ext = determine_ext(stream_url)
formats.append({
'url': stream_url,
'format_id': str_or_none(rate_id),
'ext': 'mp4' if ext == 'm3u8' else ext,
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
'quality': rate_id % -10000 if rate_id is not None else None,
**traverse_obj(rate_info, {
'format': ('name', {str_or_none}),
'tbr': ('bit', {int_or_none}),
}),
})
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id.isdigit():
room_id = video_id
else:
page = self._download_webpage(url, video_id)
room_id = self._html_search_regex(
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
webpage = self._download_webpage(url, video_id)
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
raise UserNotLive(video_id=video_id)
# Grab metadata from API
params = {
@@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
'time': int(time.time()),
}
params['auth'] = hashlib.md5(
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
room = self._download_json(
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
room = traverse_obj(self._download_json(
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
note='Downloading room info', query=params)['data']
note='Downloading room info', query=params, fatal=False), 'data')
# 1 = live, 2 = offline
if room.get('show_status') == '2':
raise ExtractorError('Live stream is offline', expected=True)
if traverse_obj(room, 'show_status') == '2':
raise UserNotLive(video_id=video_id)
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
form_data = {
'rate': 0,
**self._calc_sign(js_sign_func, video_id, room_id),
}
stream_formats = [self._download_json(
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
video_id, note="Downloading livestream format",
data=urlencode_postdata(form_data))]
title = unescapeHTML(room['room_name'])
description = room.get('show_details')
thumbnail = room.get('room_src')
uploader = room.get('nickname')
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
form_data['rate'] = rate_id
stream_formats.append(self._download_json(
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
video_id, note=f'Downloading livestream format {rate_id}',
data=urlencode_postdata(form_data)))
return {
'id': room_id,
'display_id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': self._extract_stream_formats(stream_formats),
'is_live': True,
'subtitles': subs,
'formats': formats,
**traverse_obj(room, {
'display_id': ('url', {str}, {lambda i: i[1:]}),
'title': ('room_name', {unescapeHTML}),
'description': ('show_details', {str}),
'uploader': ('nickname', {str}),
'thumbnail': ('room_src', {url_or_none}),
})
}
class DouyuShowIE(InfoExtractor):
class DouyuShowIE(DouyuBaseIE):
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
'md5': '0c2cfd068ee2afe657801269b2d86214',
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
'info_dict': {
'id': 'rjNBdvnVXNzvE2yw',
'id': 'mPyq7oVNe5Yv1gLY',
'ext': 'mp4',
'title': '陈一发儿:砒霜 我有个室友系列04-01 22点场',
'duration': 7150.08,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '陈一发儿',
'uploader_id': 'XrZwYelr5wbK',
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
'upload_date': '20170402',
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
'duration': 633,
'thumbnail': str,
'uploader': '美食作家王刚V',
'uploader_id': 'OVAO4NVx1m7Q',
'timestamp': 1661850002,
'upload_date': '20220830',
'view_count': int,
'tags': ['美食', '美食综合'],
},
}, {
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
'only_matching': True,
}]
_FORMATS = {
'super': '原画',
'high': '超清',
'normal': '高清',
}
_QUALITIES = {
'super': -1,
'high': -2,
'normal': -3,
}
_RESOLUTIONS = {
'super': '1920x1080',
'high': '1280x720',
'normal': '852x480',
}
def _real_extract(self, url):
url = url.replace('vmobile.', 'v.')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
room_info = self._parse_json(self._search_regex(
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
video_info = self._search_json(
r'<script>\s*window\.\$DATA\s*=', webpage,
'video info', video_id, transform_source=js_to_json)
video_info = None
js_sign_func = self._search_js_sign_func(webpage)
form_data = {
'vid': video_id,
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
}
url_info = self._download_json(
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
data=urlencode_postdata(form_data), note="Downloading video formats")
for trial in range(5):
# Sometimes Douyu rejects our request. Let's try it more times
try:
video_info = self._download_json(
'https://vmobile.douyu.com/video/getInfo', video_id,
query={'vid': video_id},
headers={
'Referer': url,
'x-requested-with': 'XMLHttpRequest',
})
break
except ExtractorError:
self._sleep(1, video_id)
if not video_info:
raise ExtractorError('Can\'t fetch video info')
formats = self._extract_m3u8_formats(
video_info['data']['video_url'], video_id,
entry_protocol='m3u8_native', ext='mp4')
upload_date = unified_strdate(self._html_search_regex(
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
'upload date', fatal=False))
uploader = uploader_id = uploader_url = None
mobj = re.search(
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
webpage)
if mobj:
uploader_id, uploader = mobj.groups()
uploader_url = urljoin(url, '/author/' + uploader_id)
formats = []
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
video_url = traverse_obj(url, ('url', {url_or_none}))
if video_url:
ext = determine_ext(video_url)
formats.append({
'format': self._FORMATS.get(name),
'format_id': name,
'url': video_url,
'quality': self._QUALITIES.get(name),
'ext': 'mp4' if ext == 'm3u8' else ext,
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
**parse_resolution(self._RESOLUTIONS.get(name))
})
else:
self.to_screen(
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
return {
'id': video_id,
'title': room_info['name'],
'formats': formats,
'duration': room_info.get('duration'),
'thumbnail': room_info.get('pic'),
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
**traverse_obj(video_info, ('DATA', {
'title': ('content', 'title', {str}),
'uploader': ('content', 'author', {str}),
'uploader_id': ('content', 'up_id', {str_or_none}),
'duration': ('content', 'video_duration', {int_or_none}),
'thumbnail': ('content', 'video_pic', {url_or_none}),
'timestamp': ('content', 'create_time', {int_or_none}),
'view_count': ('content', 'view_num', {int_or_none}),
'tags': ('videoTag', ..., 'tagName', {str}),
}))
}

View File

@@ -2,7 +2,7 @@ import json
import uuid
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
determine_ext,
ExtractorError,
@@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor):
return f'Bearer {token}'
def _process_errors(self, e, geo_countries):
info = self._parse_json(e.cause.read().decode('utf-8'), None)
info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
error = info['errors'][0]
error_code = error.get('code')
if error_code == 'access.denied.geoblocked':
@@ -87,7 +87,7 @@ class DPlayBaseIE(InfoExtractor):
'include': 'images,primaryChannel,show,tags'
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
self._process_errors(e, geo_countries)
raise
video_id = video['data']['id']
@@ -99,7 +99,7 @@ class DPlayBaseIE(InfoExtractor):
streaming = self._download_video_playback_info(
disco_base, video_id, headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
self._process_errors(e, geo_countries)
raise
for format_dict in streaming:
@@ -746,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
'info_dict': {
@@ -767,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
'upload_date': '20140101',
'tags': [],
},
}, {
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
'info_dict': {
'id': '4922860',
'ext': 'mp4',
'title': 'Roadworthy Rescues | Teaser Trailer',
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
'creator': 'Originals',
'series': 'Roadworthy Rescues',
'thumbnail': r're:^https?://.+\.jpe?g$',
'upload_date': '20220907',
'timestamp': 1662523200,
'duration': 1066.356,
'tags': [],
},
}, {
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
'only_matching': True,
}]
_PRODUCT = 'MTOD'

View File

@@ -1,3 +1,4 @@
import base64
import os.path
import re
@@ -5,14 +6,13 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
traverse_obj,
try_get,
update_url_query,
url_basename,
)
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
_TESTS = [
{
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
@@ -22,7 +22,16 @@ class DropboxIE(InfoExtractor):
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
}
}, {
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
'only_matching': True,
}, {
'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
'only_matching': True,
}, {
'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
'only_matching': True,
}, {
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
'only_matching': True,
},
]
@@ -53,16 +62,25 @@ class DropboxIE(InfoExtractor):
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
formats, subtitles, has_anonymous_download = [], {}, False
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
if not transcode_url:
continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
break
# downloads enabled we can get the original file
if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
video_url = re.sub(r'[?&]dl=0', '', url)
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
if has_anonymous_download:
formats.append({
'url': update_url_query(url, {'dl': '1'}),
'format_id': 'original',
'format_note': 'Original',
'quality': 1
})
return {
'id': video_id,

View File

@@ -2,7 +2,7 @@ import functools
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor):
response = super(EaglePlatformIE, self)._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response

View File

@@ -1,10 +1,6 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
from ..networking import Request
from ..utils import float_or_none, int_or_none, parse_iso8601
class EitbIE(InfoExtractor):
@@ -54,7 +50,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL')
if hls_url:
request = sanitized_Request(
request = Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url})
token_data = self._download_json(

96
yt_dlp/extractor/eplus.py Normal file
View File

@@ -0,0 +1,96 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
try_call,
unified_timestamp,
)
class EplusIbIE(InfoExtractor):
IE_NAME = 'eplus:inbound'
IE_DESC = 'e+ (イープラス) overseas'
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
_TESTS = [{
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
'info_dict': {
'id': '354502-0001-002',
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022LIVE with a smile!【Streaming+(配信)】',
'live_status': 'was_live',
'release_date': '20211231',
'release_timestamp': 1640952000,
'description': str,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
'expected_warnings': [
'Could not find the playlist URL. This event may not be accessible',
'No video formats found!',
'Requested format is not available',
],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
delivery_status = data_json.get('delivery_status')
archive_mode = data_json.get('archive_mode')
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
release_timestamp_str = data_json.get('event_datetime_text') # JST
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
if delivery_status == 'PREPARING':
live_status = 'is_upcoming'
elif delivery_status == 'STARTED':
live_status = 'is_live'
elif delivery_status == 'STOPPED':
if archive_mode != 'ON':
raise ExtractorError(
'This event has ended and there is no archive for this event', expected=True)
live_status = 'post_live'
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
live_status = 'post_live'
elif delivery_status == 'CONFIRMED_ARCHIVE':
live_status = 'was_live'
else:
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
live_status = 'is_live'
formats = []
m3u8_playlist_urls = self._search_json(
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
if not m3u8_playlist_urls:
if live_status == 'is_upcoming':
self.raise_no_formats(
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
else:
self.raise_no_formats(
'Could not find the playlist URL. This event may not be accessible', expected=True)
elif live_status == 'is_upcoming':
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
elif live_status == 'post_live':
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
else:
for m3u8_playlist_url in m3u8_playlist_urls:
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
# FIXME: HTTP request headers need to be updated to continue download
warning = 'Due to technical limitations, the download will be interrupted after one hour'
if live_status == 'is_live':
self.report_warning(warning)
elif live_status == 'was_live':
self.report_warning(f'{warning}. You can restart to continue the download')
return {
'id': data_json['app_id'],
'title': data_json.get('app_name'),
'formats': formats,
'live_status': live_status,
'description': data_json.get('content'),
'release_timestamp': release_timestamp,
}

View File

@@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, display_id)
video_id = self._match_id(urlh.geturl())
video_id = self._match_id(urlh.url)
hash = self._search_regex(
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')

View File

@@ -0,0 +1,63 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
str_or_none,
traverse_obj,
url_or_none,
)
class ErocastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://erocast.me/track/9787/f',
'md5': 'af63b91f5f231096aba54dd682abea3b',
'info_dict': {
'id': '9787',
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
'ext': 'm4a',
'age_limit': 18,
'release_timestamp': 1696178652,
'release_date': '20231001',
'modified_timestamp': int,
'modified_date': str,
'description': 'ExtraTerrestrial Tuesday!',
'uploader': 'clarissaisshy',
'uploader_id': '8113',
'uploader_url': 'https://erocast.me/clarissaisshy',
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
'duration': 2307,
'view_count': int,
'comment_count': int,
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_json(
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
'age_limit': 18,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('description', {str}),
'release_timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'uploader': ('user', 'name', {str}),
'uploader_id': ('user', 'id', {str_or_none}),
'uploader_url': ('user', 'permalink_url', {url_or_none}),
'thumbnail': ('artwork_url', {url_or_none}),
'duration': ('duration', {int_or_none}),
'view_count': ('plays', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}),
'webpage_url': ('permalink_url', {url_or_none}),
}),
}

View File

@@ -41,7 +41,7 @@ class EttuTvIE(InfoExtractor):
'device': 'desktop',
})
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream_response['data']['stream'], video_id, 'mp4')

View File

@@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?(?:expressen|di)\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?:(?:tvspelare/video|video-?player/embed)/)?
(?:tv|nyheter)/(?:[^/?#]+/)*
(?P<id>[^/?#&]+)
'''
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
@@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
}, {
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -8,6 +8,8 @@ from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..utils import (
ExtractorError,
clean_html,
@@ -19,11 +21,10 @@ from ..utils import (
int_or_none,
js_to_json,
merge_dicts,
network_exceptions,
parse_count,
parse_qs,
qualities,
sanitized_Request,
str_or_none,
traverse_obj,
try_get,
url_or_none,
@@ -73,6 +74,22 @@ class FacebookIE(InfoExtractor):
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
_TESTS = [{
'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
'info_dict': {
'id': '3676516585958356',
'ext': 'mp4',
'title': 'dr Adam Przygoda',
'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
'uploader': 'RADIO KICKS FM',
'upload_date': '20230818',
'timestamp': 1692346159,
'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670',
'duration': 3132.184,
'view_count': int,
'concurrent_view_count': 0,
},
}, {
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
'md5': '6a40d33c0eccbb1af76cf0485a052659',
'info_dict': {
@@ -90,16 +107,16 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '274175099429670',
'ext': 'mp4',
'title': 'Asif Nawab Butt',
'description': 'Asif Nawab Butt',
'title': 'Asif',
'description': '',
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
'duration': 131.03,
'concurrent_view_count': int,
},
'expected_warnings': [
'title'
]
}, {
'note': 'Video with DASH manifest',
'url': 'https://www.facebook.com/video.php?v=957955867617029',
@@ -151,7 +168,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '3f3798adb2b73423263e59376f1f5eb7',
'md5': 'ca63897a90c9452efee5f8c40d080e25',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@@ -162,6 +179,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'CNN',
'thumbnail': r're:^https?://.*',
'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -170,12 +190,16 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1417995061575415',
'ext': 'mp4',
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
'description': 'Довгоочікуване відео',
'timestamp': 1486648771,
'timestamp': 1486648217,
'upload_date': '20170209',
'uploader': 'Yaroslav Korpan',
'uploader_id': '100000948048708',
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
'concurrent_view_count': int,
'thumbnail': r're:^https?://.*',
'view_count': int,
'duration': 11736.446,
},
'params': {
'skip_download': True,
@@ -192,9 +216,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*',
},
'params': {
'skip_download': True,
},
'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
@@ -208,9 +230,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Elisabeth Ahtn',
'uploader_id': '100013949973717',
},
'params': {
'skip_download': True,
},
'skip': 'Requires logging in',
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
@@ -252,7 +272,11 @@ class FacebookIE(InfoExtractor):
'timestamp': 1527084179,
'upload_date': '20180523',
'uploader': 'ESL One Dota 2',
'uploader_id': '234218833769558',
'uploader_id': '100066514874195',
'duration': 4524.212,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
},
'params': {
'skip_download': True,
@@ -262,8 +286,17 @@ class FacebookIE(InfoExtractor):
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
'info_dict': {
'id': '106560053808006',
'ext': 'mp4',
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
'timestamp': 1549275572,
'duration': 3.413,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
},
'playlist_count': 2,
}, {
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/watch/?v=647537299265662',
@@ -276,6 +309,7 @@ class FacebookIE(InfoExtractor):
'id': '10157667649866271',
},
'playlist_count': 3,
'skip': 'Requires logging in',
}, {
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
@@ -319,7 +353,7 @@ class FacebookIE(InfoExtractor):
}
def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req = Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
@@ -340,8 +374,8 @@ class FacebookIE(InfoExtractor):
'timezone': '-60',
'trynum': '1',
}
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
try:
login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page')
@@ -367,8 +401,8 @@ class FacebookIE(InfoExtractor):
'h': h,
'name_action_selected': 'dont_save',
}
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
@@ -383,9 +417,9 @@ class FacebookIE(InfoExtractor):
def extract_metadata(webpage):
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text'))
@@ -463,25 +497,25 @@ class FacebookIE(InfoExtractor):
dash_manifest = video.get('dash_manifest')
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=video.get('dash_manifest_url')))
def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
for f in info['formats']:
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
info['_format_sort_fields'] = ('res', 'quality')
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
r'data-sjs>({.*?%s.*?})</script>' % _filter,
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter):
replay_data = extract_relay_data(_filter)
for require in (replay_data.get('require') or []):
if require[0] == 'RelayPrefetchedStreamCache':
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
return traverse_obj(extract_relay_data(_filter), (
'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
if not video_data:
server_js_data = self._parse_json(self._search_regex([
@@ -492,15 +526,23 @@ class FacebookIE(InfoExtractor):
if not video_data:
data = extract_relay_prefetched_data(
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
if data:
entries = []
def parse_graphql_video(video):
v_id = video.get('videoId') or video.get('id') or video_id
reel_info = traverse_obj(
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
if reel_info:
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
formats = []
q = qualities(['sd', 'hd'])
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', '')):
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')):
playable_url = video.get(key)
if not playable_url:
continue
@@ -509,19 +551,20 @@ class FacebookIE(InfoExtractor):
else:
formats.append({
'format_id': format_id,
'quality': q(format_id),
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': playable_url,
})
extract_dash_manifest(video, formats)
v_id = video.get('videoId') or video.get('id') or video_id
info = {
'id': v_id,
'formats': formats,
'thumbnail': traverse_obj(
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
'uploader_id': try_get(video, lambda x: x['owner']['id']),
'timestamp': int_or_none(video.get('publish_time')),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
or float_or_none(video.get('length_in_second'))),
}
process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text'])
@@ -676,9 +719,11 @@ class FacebookIE(InfoExtractor):
for src_type in ('src', 'src_no_ratelimit'):
src = f[0].get('%s_%s' % (quality, src_type))
if src:
preference = -10 if format_id == 'progressive' else -1
# sd, hd formats w/o resolution info should be deprioritized below DASH
# TODO: investigate if progressive or src formats still exist
preference = -10 if format_id == 'progressive' else -3
if quality == 'hd':
preference += 5
preference += 1
formats.append({
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
'url': src,
@@ -782,18 +827,18 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
'md5': 'f13dd37f2633595982db5ed8765474d3',
'info_dict': {
'id': '1195289147628387',
'ext': 'mp4',
'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
'description': 'md5:24ea7ef062215d295bdde64e778f5474',
'uploader': 'Beast Camp Training',
'uploader_id': '1738535909799870',
'duration': 9.536,
'thumbnail': r're:^https?://.*',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'uploader_id': '100040874179269',
'duration': 9.579,
'timestamp': 1637502609,
'upload_date': '20211121',
'timestamp': 1637502604,
'thumbnail': r're:^https?://.*',
}
}]

View File

@@ -3,11 +3,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..dependencies import websockets
from ..networking import Request
from ..utils import (
ExtractorError,
WebSocketsWrapper,
js_to_json,
sanitized_Request,
traverse_obj,
update_url_query,
urlencode_postdata,
@@ -57,7 +57,7 @@ class FC2IE(InfoExtractor):
}
login_data = urlencode_postdata(login_form_strs)
request = sanitized_Request(
request = Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +66,7 @@ class FC2IE(InfoExtractor):
return False
# this is also needed
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -1,8 +1,6 @@
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
qualities,
strip_or_none,
@@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor):
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
video_id)['response']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise
@@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor):
channel_data = self._download_json(
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise

View File

@@ -3,10 +3,10 @@ import uuid
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_parse_unquote,
)
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -20,7 +20,7 @@ from ..utils import (
class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
_TESTS = [{
# clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
@@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
# sports event, geo-restricted
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
'only_matching': True,
}, {
# fox sports replay, geo-restricted
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
'only_matching': True,
}]
_GEO_BYPASS = False
_HOME_PAGE_URL = 'https://www.fox.com/'
@@ -68,9 +72,9 @@ class FOXIE(InfoExtractor):
'https://api3.fox.com/v2.0/' + path,
video_id, data=data, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
entitlement_issues = self._parse_json(
e.cause.read().decode(), video_id)['entitlementIssues']
e.cause.response.read().decode(), video_id)['entitlementIssues']
for e in entitlement_issues:
if e.get('errorCode') == 1005:
raise ExtractorError(
@@ -123,8 +127,8 @@ class FOXIE(InfoExtractor):
try:
m3u8_url = self._download_json(release_url, video_id)['playURL']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read().decode(), video_id)
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read().decode(), video_id)
if error.get('exception') == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=['US'])
raise ExtractorError(error['description'], expected=True)

View File

@@ -1,6 +1,7 @@
from .common import InfoExtractor
from .uplynk import UplynkPreplayIE
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
from ..networking import HEADRequest
from ..utils import float_or_none, make_archive_id, smuggle_url
class FoxSportsIE(InfoExtractor):
@@ -35,7 +36,7 @@ class FoxSportsIE(InfoExtractor):
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
})
preplay_url = self._request_webpage(
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
return {
'_type': 'url_transparent',

View File

@@ -1,5 +1,5 @@
from ..utils import HEADRequest
from .common import InfoExtractor
from ..networking import HEADRequest
class FujiTVFODPlus7IE(InfoExtractor):

View File

@@ -3,7 +3,7 @@ import re
import string
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
@@ -46,8 +46,8 @@ class FunimationBaseIE(InfoExtractor):
}))
FunimationBaseIE._TOKEN = data['token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)['error']
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
error = self._parse_json(e.cause.response.read().decode(), None)['error']
raise ExtractorError(error, expected=True)
raise

View File

@@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = {}
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
if rumble_url:
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}

View File

@@ -2,13 +2,8 @@ import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
HEADRequest,
remove_start,
sanitized_Request,
smuggle_url,
urlencode_postdata,
)
from ..networking import HEADRequest, Request
from ..utils import remove_start, smuggle_url, urlencode_postdata
class GDCVaultIE(InfoExtractor):
@@ -138,8 +133,8 @@ class GDCVaultIE(InfoExtractor):
'password': password,
}
request = sanitized_Request(login_url, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request = Request(login_url, urlencode_postdata(login_form))
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
self._download_webpage(logout_url, display_id, 'Logging out')
@@ -163,7 +158,7 @@ class GDCVaultIE(InfoExtractor):
video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension
video_url = self._request_webpage(
HEADRequest(video_url), video_id).geturl()
HEADRequest(video_url), video_id).url
return {
'id': video_id,

View File

@@ -58,6 +58,8 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'trailer',
'upload_date': '20100513',
'direct': True,
'timestamp': 1273772943.0,
}
},
# Direct link to media delivered compressed (until Accept-Encoding is *)
@@ -101,6 +103,8 @@ class GenericIE(InfoExtractor):
'ext': 'webm',
'title': '5_Lennart_Poettering_-_Systemd',
'upload_date': '20141120',
'direct': True,
'timestamp': 1416498816.0,
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
@@ -133,6 +137,7 @@ class GenericIE(InfoExtractor):
'upload_date': '20201204',
},
}],
'skip': 'Dead link',
},
# RSS feed with item with description and thumbnails
{
@@ -145,12 +150,12 @@ class GenericIE(InfoExtractor):
'playlist': [{
'info_dict': {
'ext': 'm4a',
'id': 'c1c879525ce2cb640b344507e682c36d',
'id': '818a5d38-01cd-152f-2231-ee479677fa82',
'title': 're:Hydrogen!',
'description': 're:.*In this episode we are going.*',
'timestamp': 1567977776,
'upload_date': '20190908',
'duration': 459,
'duration': 423,
'thumbnail': r're:^https?://.*\.jpg$',
'episode_number': 1,
'season_number': 1,
@@ -267,6 +272,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': '404 Not Found',
},
# MPD from http://dash-mse-test.appspot.com/media.html
{
@@ -278,6 +284,7 @@ class GenericIE(InfoExtractor):
'title': 'car-20120827-manifest',
'formats': 'mincount:9',
'upload_date': '20130904',
'timestamp': 1378272859.0,
},
},
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
@@ -318,7 +325,7 @@ class GenericIE(InfoExtractor):
'id': 'cmQHVoWB5FY',
'ext': 'mp4',
'upload_date': '20130224',
'uploader_id': 'TheVerge',
'uploader_id': '@TheVerge',
'description': r're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
@@ -2370,7 +2377,7 @@ class GenericIE(InfoExtractor):
'id': flashvars['video_id'],
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'thumbnail': urljoin(url, thumbnail),
'formats': formats,
}
@@ -2431,7 +2438,7 @@ class GenericIE(InfoExtractor):
'Accept-Encoding': 'identity',
**smuggled_data.get('http_headers', {})
})
new_url = full_response.geturl()
new_url = full_response.url
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url)
@@ -2529,12 +2536,12 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
xspf_base_url=full_response.geturl()),
xspf_base_url=full_response.url),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
doc,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_base_url=full_response.url.rpartition('/')[0],
mpd_url=url)
self._extra_manifest_info(info_dict, url)
self.report_detected('DASH manifest')
@@ -2562,7 +2569,7 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug('Looking for embeds')
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
if len(embeds) == 1:
return {**info_dict, **embeds[0]}
return merge_dicts(embeds[0], info_dict)
elif embeds:
return self.playlist_result(embeds, **info_dict)
raise UnsupportedError(url)
@@ -2572,7 +2579,7 @@ class GenericIE(InfoExtractor):
info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
url, smuggled_data = unsmuggle_url(url, {})
actual_url = urlh.geturl() if urlh else url
actual_url = urlh.url if urlh else url
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)

View File

@@ -8,8 +8,8 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..networking import HEADRequest
from ..utils import (
HEADRequest,
ExtractorError,
float_or_none,
orderedSet,

View File

@@ -60,13 +60,13 @@ class GofileIE(InfoExtractor):
account_data = self._download_json(
'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
self._TOKEN = account_data['data']['token']
self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id):
query_params = {
'contentId': file_id,
'token': self._TOKEN,
'websiteToken': 12345,
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
}
password = self.get_param('videopassword')
if password:

View File

@@ -5,7 +5,9 @@ from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
determine_ext,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
int_or_none,
lowercase_escape,
try_get,
@@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor):
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 45,
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
}
}, {
# video can't be watched anonymously due to view count limit reached,
@@ -207,10 +210,10 @@ class GoogleDriveIE(InfoExtractor):
'export': 'download',
})
def request_source_file(source_url, kind):
def request_source_file(source_url, kind, data=None):
return self._request_webpage(
source_url, video_id, note='Requesting %s file' % kind,
errnote='Unable to request %s file' % kind, fatal=False)
errnote='Unable to request %s file' % kind, fatal=False, data=data)
urlh = request_source_file(source_url, 'source')
if urlh:
def add_source_format(urlh):
@@ -225,7 +228,7 @@ class GoogleDriveIE(InfoExtractor):
# Using original URLs may result in redirect loop due to
# google.com's cookies mistakenly used for googleusercontent.com
# redirect URLs (see #23919).
'url': urlh.geturl(),
'url': urlh.url,
'ext': determine_ext(title, 'mp4').lower(),
'format_id': 'source',
'quality': 1,
@@ -237,14 +240,10 @@ class GoogleDriveIE(InfoExtractor):
urlh, url, video_id, note='Downloading confirmation page',
errnote='Unable to confirm download', fatal=False)
if confirmation_webpage:
confirm = self._search_regex(
r'confirm=([^&"\']+)', confirmation_webpage,
'confirmation code', default=None)
if confirm:
confirmed_source_url = update_url_query(source_url, {
'confirm': confirm,
})
urlh = request_source_file(confirmed_source_url, 'confirmed source')
confirmed_source_url = extract_attributes(
get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
if confirmed_source_url:
urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh)
else:

View File

@@ -383,9 +383,9 @@ class AwsIdp:
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
time_now = datetime.datetime.utcnow()
time_now = datetime.datetime.now(datetime.timezone.utc)
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
time_string = datetime.datetime.utcnow().strftime(format_string)
time_string = time_now.strftime(format_string)
return time_string
def __str__(self):

View File

@@ -126,7 +126,7 @@ class HKETVIE(InfoExtractor):
# If we ever wanted to provide the final resolved URL that
# does not require cookies, albeit with a shorter lifespan:
# urlh = self._downloader.urlopen(file_url)
# resolved_url = urlh.geturl()
# resolved_url = urlh.url
label = fmt.get('label')
h = self._FORMAT_HEIGHTS.get(label)
w = h * width // height if h and width and height else None

View File

@@ -1,11 +1,7 @@
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
ExtractorError,
HEADRequest,
sanitized_Request,
urlencode_postdata,
)
from ..networking import HEADRequest, Request
from ..utils import ExtractorError, urlencode_postdata
class HotNewHipHopIE(InfoExtractor):
@@ -36,9 +32,9 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'),
('mediaId', video_id),
])
r = sanitized_Request(
r = Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
mkd = self._download_json(
r, video_id, note='Requesting media key',
errnote='Could not download media key')
@@ -50,7 +46,7 @@ class HotNewHipHopIE(InfoExtractor):
req = self._request_webpage(
redirect_req, video_id,
note='Resolving final URL', errnote='Could not resolve final URL')
video_url = req.geturl()
video_url = req.url
if video_url.endswith('.html'):
raise ExtractorError('Redirect failed')

View File

@@ -6,7 +6,8 @@ import time
import uuid
from .common import InfoExtractor
from ..compat import compat_HTTPError, compat_str
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
@@ -83,7 +84,7 @@ class HotStarIE(HotStarBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
(?:
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
(?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
)?
[^/?#]+/
@@ -141,6 +142,51 @@ class HotStarIE(HotStarBaseIE):
'duration': 1272,
'channel_id': 3,
},
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
}, {
'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
'info_dict': {
'id': '1260097320',
'ext': 'mp4',
'title': 'Back To School',
'season': 'Chapter 1',
'description': 'md5:b0d6a4c8a650681491e7405496fc7e13',
'timestamp': 1650564000,
'channel': 'Hotstar Specials',
'series': 'Kana Kaanum Kaalangal',
'season_number': 1,
'season_id': 9441,
'upload_date': '20220421',
'episode': 'Back To School',
'episode_number': 1,
'duration': 1810,
'channel_id': 54,
},
}, {
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
'info_dict': {
'id': '1000262286',
'ext': 'mp4',
'title': 'E3 - SaiRat, Kahani Pyaar Ki',
'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385',
'episode': 'E3 - SaiRat, Kahani Pyaar Ki',
'upload_date': '20210606',
'timestamp': 1622943900,
'duration': 5395,
},
}, {
'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
'info_dict': {
'id': '1000091195',
'ext': 'mp4',
'title': 'Premam',
'release_year': 2015,
'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
'timestamp': 1462149000,
'upload_date': '20160502',
'episode': 'Premam',
'duration': 8994,
},
}, {
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
'only_matching': True,
@@ -159,6 +205,7 @@ class HotStarIE(HotStarBaseIE):
'episode': 'episode',
'tv': 'episode',
'shows': 'episode',
'clips': 'content',
None: 'content',
}
@@ -186,8 +233,10 @@ class HotStarIE(HotStarBaseIE):
video_type = self._TYPE.get(video_type, video_type)
cookies = self._get_cookies(url) # Cookies before any request
video_data = self._call_api_v1(f'{video_type}/detail', video_id,
query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
video_data = traverse_obj(
self._call_api_v1(
f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
('body', 'results', 'item', {dict})) or {}
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
self.report_drm(video_id)
@@ -233,7 +282,7 @@ class HotStarIE(HotStarBaseIE):
'height': int_or_none(playback_set.get('height')),
}]
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
geo_restricted = True
continue
@@ -272,6 +321,7 @@ class HotStarIE(HotStarBaseIE):
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
'release_year': int_or_none(video_data.get('year')),
'formats': formats,
'subtitles': subs,
'channel': video_data.get('channelName'),

View File

@@ -1,13 +1,13 @@
import json
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
parse_age_limit,
sanitized_Request,
try_get,
)
@@ -42,7 +42,7 @@ class HRTiBaseIE(InfoExtractor):
'application_version': self._APP_VERSION
}
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
req.get_method = lambda: 'PUT'
resources = self._download_json(
@@ -73,8 +73,8 @@ class HRTiBaseIE(InfoExtractor):
self._login_url, None, note='Logging in', errnote='Unable to log in',
data=json.dumps(auth_data).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
if isinstance(e.cause, HTTPError) and e.cause.status == 406:
auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
else:
raise

View File

@@ -1,19 +1,32 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
remove_end,
traverse_obj,
try_get,
unified_timestamp,
url_or_none,
urlencode_postdata,
)
class HungamaIE(InfoExtractor):
class HungamaBaseIE(InfoExtractor):
def _call_api(self, path, content_id, fatal=False):
return traverse_obj(self._download_json(
f'https://cpage.api.hungama.com/v2/page/content/{content_id}/{path}/detail',
content_id, fatal=fatal, query={
'device': 'web',
'platform': 'a',
'storeId': '1',
}), ('data', {dict})) or {}
class HungamaIE(HungamaBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?hungama\.com/
(?:www\.|un\.)?hungama\.com/
(?:
(?:video|movie)/[^/]+/|
(?:video|movie|short-film)/[^/]+/|
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
)
(?P<id>\d+)
@@ -25,13 +38,28 @@ class HungamaIE(InfoExtractor):
'id': '39349649',
'ext': 'mp4',
'title': 'Krishna Chants',
'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
'description': ' ',
'upload_date': '20180829',
'duration': 264,
'timestamp': 1535500800,
'view_count': int,
'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
}
'thumbnail': 'https://images1.hungama.com/tr:n-a_169_m/c/1/0dc/2ca/39349649/39349649_350x197.jpg?v=8',
'tags': 'count:6',
},
}, {
'url': 'https://un.hungama.com/short-film/adira/102524179/',
'md5': '2278463f5dc9db9054d0c02602d44666',
'info_dict': {
'id': '102524179',
'ext': 'mp4',
'title': 'Adira',
'description': 'md5:df20cd4d41eabb33634f06de1025a4b4',
'upload_date': '20230417',
'timestamp': 1681689600,
'view_count': int,
'thumbnail': 'https://images1.hungama.com/tr:n-a_23_m/c/1/197/ac9/102524179/102524179_350x525.jpg?v=1',
'tags': 'count:7',
},
}, {
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
'only_matching': True,
@@ -51,14 +79,19 @@ class HungamaIE(InfoExtractor):
'c': 'common',
'm': 'get_video_mdn_url',
})
formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
json_ld = self._search_json_ld(
self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
metadata = self._call_api('movie', video_id)
return {
**json_ld,
**traverse_obj(metadata, ('head', 'data', {
'title': ('title', {str}),
'description': ('misc', 'description', {str}),
'duration': ('duration', {int}), # duration in JSON is incorrect if string
'timestamp': ('releasedate', {unified_timestamp}),
'view_count': ('misc', 'playcount', {int_or_none}),
'thumbnail': ('image', {url_or_none}),
'tags': ('misc', 'keywords', ..., {str}),
})),
'id': video_id,
'formats': formats,
'subtitles': {
@@ -71,10 +104,10 @@ class HungamaIE(InfoExtractor):
class HungamaSongIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
'md5': 'd4a6a05a394ad0453a9bea3ca00e6024',
'md5': '964f46828e8b250aa35e5fdcfdcac367',
'info_dict': {
'id': '2931166',
'ext': 'mp3',
@@ -83,8 +116,22 @@ class HungamaSongIE(InfoExtractor):
'artist': 'Lucky Ali',
'album': None,
'release_year': 2000,
}
}
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
},
}, {
'url': 'https://un.hungama.com/song/tum-kya-mile-from-rocky-aur-rani-kii-prem-kahaani/103553672',
'md5': '964f46828e8b250aa35e5fdcfdcac367',
'info_dict': {
'id': '103553672',
'ext': 'mp3',
'title': 'md5:5ebeb1e10771b634ce5f700ce68ae5f4',
'track': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
'artist': 'Pritam Chakraborty, Arijit Singh, Shreya Ghoshal, Amitabh Bhattacharya',
'album': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
'release_year': 2023,
'thumbnail': 'https://images.hungama.com/c/1/7c2/c7b/103553671/103553671_200x200.jpg',
},
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
@@ -122,8 +169,8 @@ class HungamaSongIE(InfoExtractor):
}
class HungamaAlbumPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P<id>\d+)'
class HungamaAlbumPlaylistIE(HungamaBaseIE):
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/(?P<path>playlists|album)/[^/]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
'playlist_mincount': 7,
@@ -132,16 +179,24 @@ class HungamaAlbumPlaylistIE(InfoExtractor):
},
}, {
'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
'playlist_mincount': 50,
'playlist_mincount': 33,
'info_dict': {
'id': '123063',
},
}, {
'url': 'https://un.hungama.com/album/what-jhumka-%3F-from-rocky-aur-rani-kii-prem-kahaani/103891805/',
'playlist_mincount': 1,
'info_dict': {
'id': '103891805',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
items = re.findall(ptrn, webpage)
entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
return self.playlist_result(entries, video_id)
playlist_id, path = self._match_valid_url(url).group('id', 'path')
data = self._call_api(remove_end(path, 's'), playlist_id, fatal=True)
def entries():
for song_url in traverse_obj(data, ('body', 'rows', ..., 'data', 'misc', 'share', {url_or_none})):
yield self.url_result(song_url, HungamaSongIE)
return self.playlist_result(entries(), playlist_id)

View File

@@ -1,8 +1,9 @@
import re
import urllib.error
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
@@ -27,9 +28,9 @@ class IGNBaseIE(InfoExtractor):
try:
return self._call_api(slug)
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
e.cause.response.url, e.cause.status, e.cause.reason]
raise ExtractorError(
'Content not found: expired?', cause=e.cause,
expected=True)
@@ -196,10 +197,6 @@ class IGNVideoIE(IGNBaseIE):
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
'duration': 298,
'tags': 'count:13',
'display_id': '112203',
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
'duration': 298,
'tags': 'count:13',
},
'expected_warnings': ['HTTP Error 400: Bad Request'],
}, {
@@ -226,7 +223,7 @@ class IGNVideoIE(IGNBaseIE):
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
new_url = urlh.geturl()
new_url = urlh.url
ign_url = compat_parse_qs(
urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
if ign_url:
@@ -323,14 +320,14 @@ class IGNArticleIE(IGNBaseIE):
try:
return self._call_api(slug)
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError):
if isinstance(e.cause, HTTPError):
e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
if e.cause.code == 404:
e.cause.response.url, e.cause.status, e.cause.reason]
if e.cause.status == 404:
raise ExtractorError(
'Content not found: expired?', cause=e.cause,
expected=True)
elif e.cause.code == 503:
elif e.cause.status == 503:
self.report_warning(error_to_compat_str(e.cause))
return
raise

View File

@@ -1,7 +1,7 @@
import json
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -52,9 +52,9 @@ class ImgGamingBaseIE(InfoExtractor):
return self._call_api(
stream_path, media_id)['playerUrlCallback']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
raise ExtractorError(
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
expected=True)
raise

View File

@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_age_limit,
parse_iso8601,
time_seconds,
update_url_query,
)
@@ -11,15 +11,14 @@ from ..utils import (
class IndavideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
# Some example URLs covered by generic extractor:
# http://indavideo.hu/video/Vicces_cica_1
# http://index.indavideo.hu/video/2015_0728_beregszasz
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
# http://erotika.indavideo.hu/video/Amator_tini_punci
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
# https://indavideo.hu/video/Vicces_cica_1
# https://index.indavideo.hu/video/Hod_Nemetorszagban
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
# https://film.indavideo.hu/video/f_farkaslesen
# https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
_TESTS = [{
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
'info_dict': {
'id': '1837039',
@@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
},
}, {
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
'only_matching': True,
}, {
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://indavideo.hu/video/Vicces_cica_1',
'info_dict': {
'id': '1335611',
'ext': 'mp4',
'title': 'Vicces cica',
'description': 'Játszik a tablettel. :D',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Jet_Pack',
'uploader_id': '491217',
'timestamp': 1390821212,
'upload_date': '20140127',
'duration': 7,
'age_limit': 0,
'tags': ['cica', 'Jet_Pack'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
video_id)['data']
title = video['title']
f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
video_id, query={'_': time_seconds()})['data']
video_urls = []
@@ -60,33 +71,21 @@ class IndavideoEmbedIE(InfoExtractor):
elif isinstance(video_files, dict):
video_urls.extend(video_files.values())
video_file = video.get('video_file')
if video:
video_urls.append(video_file)
video_urls = list(set(video_urls))
video_prefix = video_urls[0].rsplit('/', 1)[0]
for flv_file in video.get('flv_files', []):
flv_url = '%s/%s' % (video_prefix, flv_file)
if flv_url not in video_urls:
video_urls.append(flv_url)
filesh = video.get('filesh')
filesh = video.get('filesh') or {}
formats = []
for video_url in video_urls:
height = int_or_none(self._search_regex(
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
if filesh:
if not height:
continue
token = filesh.get(compat_str(height))
if token is None:
continue
video_url = update_url_query(video_url, {'token': token})
if not height and len(filesh) == 1:
height = int_or_none(list(filesh.keys())[0])
token = filesh.get(str(height))
if token is None:
continue
formats.append({
'url': video_url,
'url': update_url_query(video_url, {'token': token}),
'height': height,
})
@@ -103,7 +102,7 @@ class IndavideoEmbedIE(InfoExtractor):
return {
'id': video.get('id') or video_id,
'title': title,
'title': video.get('title'),
'description': video.get('description'),
'thumbnails': thumbnails,
'uploader': video.get('user_name'),

View File

@@ -3,9 +3,9 @@ import itertools
import json
import re
import time
import urllib.error
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
decode_base_n,
@@ -442,7 +442,7 @@ class InstagramIE(InstagramBaseIE):
shared_data = self._search_json(
r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
if shared_data and self._LOGIN_URL not in urlh.geturl():
if shared_data and self._LOGIN_URL not in urlh.url:
media.update(traverse_obj(
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
@@ -589,7 +589,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
except ExtractorError as e:
# if it's an error caused by a bad query, and there are
# more GIS templates to try, ignore it and keep trying
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
if gis_tmpl != gis_tmpls[-1]:
continue
raise

View File

@@ -81,7 +81,7 @@ class IPrimaIE(InfoExtractor):
note='Logging in')
# a profile may need to be selected first, even when there is only a single one
if '/profile-select' in login_handle.geturl():
if '/profile-select' in login_handle.url:
profile_id = self._search_regex(
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
@@ -89,7 +89,7 @@ class IPrimaIE(InfoExtractor):
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0))
code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
if not code:
raise ExtractorError('Login failed', expected=True)
@@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
), webpage, 'real id', group='id', default=None)
if not video_id:
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
video_id = traverse_obj(
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
if not video_id:
nuxt_data = self._search_json(
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
if not video_id:
self.raise_no_formats('Unable to extract video ID from webpage')

View File

@@ -499,9 +499,10 @@ class IqIE(InfoExtractor):
'tm': tm,
'qdy': 'a',
'qds': 0,
'k_ft1': 141287244169348,
'k_ft4': 34359746564,
'k_ft5': 1,
'k_ft1': '143486267424900',
'k_ft4': '1572868',
'k_ft7': '4',
'k_ft5': '1',
'bop': JSON.stringify({
'version': '10.0',
'dfp': dfp
@@ -527,16 +528,24 @@ class IqIE(InfoExtractor):
if player_js_cache:
return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex(
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
webpack_map = self._search_json(
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
replacement_map = self._search_json(
r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
fatal=False) or {}
for module_index in reversed(webpack_map):
real_module = replacement_map.get(module_index) or module_index
module_js = self._download_webpage(
f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
self.cache.store('iq', 'player_js', module_js)

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -101,8 +101,8 @@ class KakaoIE(InfoExtractor):
cdn_api_base, video_id, query=query,
note='Downloading video URL for profile %s' % profile_name)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
resp = self._parse_json(e.cause.read().decode(), video_id)
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
resp = self._parse_json(e.cause.response.read().decode(), video_id)
if resp.get('code') == 'GeoBlocked':
self.raise_geo_restricted()
raise

View File

@@ -1,7 +1,6 @@
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
HEADRequest,
UserNotLive,
float_or_none,
merge_dicts,
@@ -30,7 +29,7 @@ class KickBaseIE(InfoExtractor):
class KickIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/yuppy',
'info_dict': {

View File

@@ -91,7 +91,7 @@ class KuwoIE(KuwoBaseIE):
webpage, urlh = self._download_webpage_handle(
url, song_id, note='Download song detail info',
errnote='Unable to get song detail info')
if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
if song_id not in urlh.url or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
song_name = self._html_search_regex(

View File

@@ -1,13 +1,8 @@
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
HEADRequest,
int_or_none,
parse_duration,
unified_strdate,
)
from ..networking import HEADRequest
from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
class LA7IE(InfoExtractor):

View File

@@ -1,11 +1,12 @@
import functools
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
HEADRequest,
OnDemandPagedList,
UnsupportedError,
determine_ext,
@@ -21,10 +22,11 @@ from ..utils import (
class LBRYBaseIE(InfoExtractor):
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
_PAGE_SIZE = 50
def _call_api_proxy(self, method, display_id, params, resource):
headers = {'Content-Type': 'application/json-rpc'}
@@ -68,22 +70,82 @@ class LBRYBaseIE(InfoExtractor):
'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}),
'uploader_id': ('signing_channel', 'name', {str}),
})
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
if channel_name and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
if info.get('uploader_id') and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
return info
def _fetch_page(self, display_id, url, params, page):
page += 1
page_params = {
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
**params,
}
result = self._call_api_proxy(
'claim_search', display_id, page_params, f'page {page}')
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': item['claim_id'],
'url': self._permanent_url(url, item['name'], item['claim_id']),
}
def _playlist_entries(self, url, display_id, claim_param, metadata):
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'claim_type': 'stream',
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
**claim_param,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, display_id, url, params),
self._PAGE_SIZE)
return self.playlist_result(
entries, display_id, **traverse_obj(metadata, ('value', {
'title': 'title',
'description': 'description',
})))
class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
(?:\$/(?:download|embed)/)?
(?P<id>
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
)'''
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
'md5': 'fffd15d76062e9a985c22c7c7f2f4805',
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
'info_dict': {
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
'ext': 'mp4',
@@ -97,6 +159,7 @@ class LBRYIE(LBRYBaseIE):
'height': 720,
'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
'license': 'None',
'uploader_id': '@Mantega',
'duration': 346,
'channel': 'LBRY/Odysee rats united!!!',
'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
@@ -130,11 +193,11 @@ class LBRYIE(LBRYBaseIE):
'vcodec': 'none',
'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
'license': 'None',
'uploader_id': '@LBRYFoundation',
}
}, {
# HLS
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
'md5': '25049011f3c8bc2f8b60ad88a031837e',
'md5': 'c35fac796f62a14274b4dc2addb5d0ba',
'info_dict': {
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
'ext': 'mp4',
@@ -149,6 +212,7 @@ class LBRYIE(LBRYBaseIE):
'channel': 'Gardening In Canada',
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
'uploader_id': '@gardeningincanada',
'formats': 'mincount:3',
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
'license': 'Copyrighted (contact publisher)',
@@ -174,6 +238,7 @@ class LBRYIE(LBRYBaseIE):
'formats': 'mincount:1',
'thumbnail': 'startswith:https://thumb',
'license': 'None',
'uploader_id': '@RT',
},
'params': {'skip_download': True}
}, {
@@ -184,12 +249,13 @@ class LBRYIE(LBRYBaseIE):
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'timestamp': 1685790036,
'upload_date': '20230603',
'uploader_id': '@wickedtruths',
'timestamp': 1695114347,
'upload_date': '20230919',
'release_timestamp': 1685617473,
'release_date': '20230601',
'duration': 1063,
@@ -229,10 +295,10 @@ class LBRYIE(LBRYBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
if display_id.startswith('$/'):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
if display_id.startswith('@'):
display_id = display_id.replace(':', '#')
else:
display_id = display_id.replace('/', ':')
display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
@@ -246,12 +312,13 @@ class LBRYIE(LBRYBaseIE):
streaming_url = self._call_api_proxy(
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
# GET request returns original video/audio file if available
ext = urlhandle_detect_ext(self._request_webpage(
streaming_url, display_id, 'Checking for original quality', headers=headers))
if ext != 'm3u8':
# GET request to v3 API returns original video/audio file if available
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
urlh = self._request_webpage(
direct_url, display_id, 'Checking for original quality', headers=headers, fatal=False)
if urlh and urlhandle_detect_ext(urlh) != 'm3u8':
formats.append({
'url': streaming_url,
'url': direct_url,
'format_id': 'original',
'quality': 1,
**traverse_obj(result, ('value', {
@@ -266,7 +333,7 @@ class LBRYIE(LBRYBaseIE):
# HEAD request returns redirect response to m3u8 URL if available
final_url = self._request_webpage(
HEADRequest(streaming_url), display_id, headers=headers,
note='Downloading streaming redirect url info').geturl()
note='Downloading streaming redirect url info').url
elif result.get('value_type') == 'stream':
claim_id, is_live = result['signing_channel']['claim_id'], True
@@ -298,7 +365,7 @@ class LBRYIE(LBRYBaseIE):
class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
_TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': {
@@ -314,65 +381,50 @@ class LBRYChannelIE(LBRYBaseIE):
'url': 'lbry://@lbry#3f',
'only_matching': True,
}]
_PAGE_SIZE = 50
def _fetch_page(self, claim_id, url, params, page):
page += 1
page_params = {
'channel_ids': [claim_id],
'claim_type': 'stream',
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
}
page_params.update(params)
result = self._call_api_proxy(
'claim_search', claim_id, page_params, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
if not (stream_claim_name and stream_claim_id):
continue
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
}
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')
result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
claim_id = result['claim_id']
qs = parse_qs(url)
content = qs.get('content', [None])[0]
params = {
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
'order_by': {
'new': ['release_time'],
'top': ['effective_amount'],
'trending': ['trending_group', 'trending_mixed'],
}[qs.get('order', ['new'])[0]],
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
}
duration = qs.get('duration', [None])[0]
if duration:
params['duration'] = {
'long': '>=1200',
'short': '<=240',
}[duration]
language = qs.get('language', ['all'])[0]
if language != 'all':
languages = [language]
if language == 'en':
languages.append('none')
params['any_languages'] = languages
entries = OnDemandPagedList(
functools.partial(self._fetch_page, claim_id, url, params),
self._PAGE_SIZE)
result_value = result.get('value') or {}
return self.playlist_result(
entries, claim_id, result_value.get('title'),
result_value.get('description'))
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
class LBRYPlaylistIE(LBRYBaseIE):
IE_NAME = 'lbry:playlist'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
_TESTS = [{
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
'info_dict': {
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
'title': 'Théâtre Classique',
'description': 'Théâtre Classique',
},
'playlist_mincount': 4,
}, {
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
'info_dict': {
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
'title': 'Social Media Exposed',
'description': 'md5:98af97317aacd5b85d595775ea37d80e',
},
'playlist_mincount': 34,
}, {
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
'info_dict': {
'id': '938fb11d-215f-4d1c-ad64-723954df2184',
},
'playlist_mincount': 1000,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
'claim_ids': [display_id],
'no_totals': True,
'page': 1,
'page_size': self._PAGE_SIZE,
}, 'playlist'), ('items', 0))
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
return self._playlist_entries(url, display_id, claim_param, result)

View File

@@ -25,7 +25,7 @@ class LecturioBaseIE(InfoExtractor):
self._LOGIN_URL, None, 'Downloading login popup')
def is_logged(url_handle):
return self._LOGIN_URL not in url_handle.geturl()
return self._LOGIN_URL not in url_handle.url
# Already logged in
if is_logged(urlh):
@@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
_VALID_URL = r'''(?x)
https://
(?:
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
(?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
)
'''
_TESTS = [{
@@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
}, {
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
'only_matching': True,
}, {
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
'only_matching': True,
}, {
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
'only_matching': True,

View File

@@ -1,7 +1,7 @@
import uuid
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
@@ -75,7 +75,7 @@ class LEGOIE(InfoExtractor):
'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
}, headers=self.geo_verification_headers())
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
if isinstance(e.cause, HTTPError) and e.cause.status == 451:
self.raise_geo_restricted(countries=countries)
raise

View File

@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..networking.exceptions import HTTPError
from ..utils import (
determine_ext,
float_or_none,
@@ -69,8 +69,8 @@ class LimelightBaseIE(InfoExtractor):
item_id, 'Downloading PlaylistService %s JSON' % method,
fatal=fatal, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
if error == 'CountryDisabled':
self.raise_geo_restricted()
raise ExtractorError(error, expected=True)

View File

@@ -2,11 +2,8 @@ import json
import random
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_HTTPError,
compat_str,
)
from ..compat import compat_b64decode, compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
clean_html,
ExtractorError,
@@ -107,7 +104,7 @@ class LinuxAcademyIE(InfoExtractor):
'sso': 'true',
})
login_state_url = urlh.geturl()
login_state_url = urlh.url
try:
login_page = self._download_webpage(
@@ -119,8 +116,8 @@ class LinuxAcademyIE(InfoExtractor):
'Referer': login_state_url,
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read(), None)
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
error = self._parse_json(e.cause.response.read(), None)
message = error.get('description') or error['code']
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)
@@ -137,7 +134,7 @@ class LinuxAcademyIE(InfoExtractor):
})
access_token = self._search_regex(
r'access_token=([^=&]+)', urlh.geturl(),
r'access_token=([^=&]+)', urlh.url,
'access token', default=None)
if not access_token:
access_token = self._parse_json(

View File

@@ -13,7 +13,7 @@ from ..utils import (
class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
_TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
@@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor):
'id': 'VOD00041606',
'title': '花千骨',
},
'playlist_count': 50,
'playlist_count': 51, # 50 episodes + 1 trailer
}, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
'md5': '969e343d9244778cb29acec608e53640',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
'info_dict': {
'id': 'VOD00041610',
'ext': 'mp4',
'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$',
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
'categories': ['奇幻', '愛情', '中國', '仙俠'],
'episode': 'Episode 1',
'episode_number': 1,
},
'params': {
@@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor):
'title': '芈月傳第1集 霸星芈月降世楚國',
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
},
'skip': 'Georestricted to Taiwan',
'skip': 'No longer exists',
}]
def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
episode_title = program_info['title']
content_id = season_list['contentId']
def _extract_playlist(self, playlist_data, content_type):
all_episodes = [
self.url_result(smuggle_url(
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
self._URL_TEMPLATE % (content_type, episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion
for episode in season_list['episode']]
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
return self.playlist_result(all_episodes, content_id, episode_title)
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -68,24 +67,31 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
if self._search_regex(
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
webpage, 'meta refresh redirect', default=False, group=0):
raise ExtractorError('No such content found', expected=True)
program_info = self._parse_json(self._search_regex(
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id)
season_list = list(program_info.get('seasonList', {}).values())
playlist_id = traverse_obj(season_list, 0, 'contentId')
if self._yes_playlist(playlist_id, video_id, smuggled_data):
return self._extract_playlist(season_list[0], video_id, program_info)
# In browsers `getMainUrl` request is always issued. Usually this
# In browsers `getProgramInfo` request is always issued. Usually this
# endpoint gives the same result as the data embedded in the webpage.
# If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code
# If, for some reason, there are no embedded data, we do an extra request.
if 'assetId' not in program_info:
program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id},
headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
@@ -96,7 +102,7 @@ class LiTVIE(InfoExtractor):
'contentType': program_info['contentType'],
}
video_data = self._download_json(
'https://www.litv.tv/vod/getMainUrl', video_id,
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
data=json.dumps(payload).encode('utf-8'),
headers={'Content-Type': 'application/json'})

View File

@@ -80,7 +80,8 @@ class LivestreamIE(InfoExtractor):
}]
_API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
def _parse_smil_formats_and_subtitles(
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base_ele = find_xpath_attr(
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
@@ -104,7 +105,7 @@ class LivestreamIE(InfoExtractor):
'tbr': tbr,
'preference': -1000, # Strictly inferior than all other formats?
})
return formats
return formats, {}
def _extract_video_info(self, video_data):
video_id = compat_str(video_data['id'])

View File

@@ -0,0 +1,50 @@
from .common import InfoExtractor
from ..utils import parse_age_limit, parse_duration, traverse_obj
class MagellanTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?magellantv\.com/(?:watch|video)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.magellantv.com/watch/my-dads-on-death-row?type=v',
'info_dict': {
'id': 'my-dads-on-death-row',
'ext': 'mp4',
'title': 'My Dad\'s On Death Row',
'description': 'md5:33ba23b9f0651fc4537ed19b1d5b0d7a',
'duration': 3780.0,
'age_limit': 14,
'tags': ['Justice', 'Reality', 'United States', 'True Crime'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.magellantv.com/video/james-bulger-the-new-revelations',
'info_dict': {
'id': 'james-bulger-the-new-revelations',
'ext': 'mp4',
'title': 'James Bulger: The New Revelations',
'description': 'md5:7b97922038bad1d0fe8d0470d8a189f2',
'duration': 2640.0,
'age_limit': 0,
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('metadata', 'description', {str}),
'duration': ('duration', {parse_duration}),
'age_limit': ('ratingCategory', {parse_age_limit}),
'tags': ('tags', ..., {str}),
}),
}

View File

@@ -1,6 +1,7 @@
import itertools
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
@@ -140,17 +141,15 @@ class MailRuIE(InfoExtractor):
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
video_id, 'Downloading video JSON')
headers = {}
video_key = self._get_cookies('https://my.mail.ru').get('video_key')
if video_key:
headers['Cookie'] = 'video_key=%s' % video_key.value
formats = []
for f in video_data['videos']:
video_url = f.get('url')
if not video_url:
continue
if video_key:
self._set_cookie(urllib.parse.urlparse(video_url).hostname, 'video_key', video_key.value)
format_id = f.get('key')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
@@ -158,7 +157,6 @@ class MailRuIE(InfoExtractor):
'url': video_url,
'format_id': format_id,
'height': height,
'http_headers': headers,
})
meta_data = video_data['meta']

View File

@@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
_TEST = {
'url': 'https://massengeschmack.tv/play/fktv202',
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
'md5': '9996f314994a49fefe5f39aa1b07ae21',
'info_dict': {
'id': 'fktv202',
'ext': 'mp4',
'title': 'Fernsehkritik-TV - Folge 202',
'title': 'Fernsehkritik-TV #202',
'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
},
}
@@ -29,9 +30,6 @@ class MassengeschmackTVIE(InfoExtractor):
episode = self._match_id(url)
webpage = self._download_webpage(url, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
@@ -67,7 +65,8 @@ class MassengeschmackTVIE(InfoExtractor):
return {
'id': episode,
'title': title,
'title': clean_html(self._html_search_regex(
r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
'formats': formats,
'thumbnail': thumbnail,
'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
}

View File

@@ -81,10 +81,24 @@ class MediaiteIE(InfoExtractor):
'upload_date': '20210930',
},
'params': {'skip_download': True}
}, {
'url': 'https://www.mediaite.com/politics/i-cant-read-it-fast-enough-while-defending-trump-larry-kudlow-overwhelmed-by-volume-of-ex-presidents-legal-troubles/',
'info_dict': {
'id': 'E6EhDX5z',
'ext': 'mp4',
'title': 'Fox Business Network - 4:00 PM - 5:00 PM - 1:39:42 pm - 1:42:20 pm',
'description': '',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/E6EhDX5z/poster.jpg?width=720',
'duration': 157,
'timestamp': 1691015535,
'upload_date': '20230802',
},
'params': {'skip_download': True}
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, None)
id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id')
data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id)
video_id = self._search_regex(
[r'"https://cdn\.jwplayer\.com/players/(\w+)', r'data-video-id\s*=\s*\"([^\"]+)\"'], webpage, 'id')
data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{video_id}', video_id)
return self._parse_jwplayer_data(data_json)

View File

@@ -1,5 +1,8 @@
from ..utils import (
unified_strdate
ExtractorError,
traverse_obj,
unified_strdate,
url_or_none,
)
from .common import InfoExtractor
from ..compat import (
@@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
(?P<id>[^/#?_]+)'''
_TESTS = [{
# mediaklikk. date in html.
# (old) mediaklikk. date in html.
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
'info_dict': {
'id': '4754129',
@@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20210901',
'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
},
'skip': 'Webpage redirects to 404 page',
}, {
# mediaklikk. date in html.
'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
'info_dict': {
'id': '6696133',
'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
'ext': 'mp4',
'upload_date': '20230903',
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
}
}, {
# m4sport
# (old) m4sport
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
'info_dict': {
'id': '4754999',
@@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20210830',
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
},
'skip': 'Webpage redirects to 404 page',
}, {
# m4sport
'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
'info_dict': {
'id': '6711136',
'title': 'Atlétika Gyémánt Liga, Brüsszel',
'display_id': 'atletika-gyemant-liga-brusszel',
'ext': 'mp4',
'upload_date': '20230908',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
}
}, {
# m4sport with *video/ url and no date
@@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
'info_dict': {
'id': '4492099',
'title': 'Real Madrid - Chelsea 1-1',
'display_id': 'real-madrid-chelsea-1-1',
'ext': 'mp4',
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
}
}, {
# hirado
# (old) hirado
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
'info_dict': {
'id': '4760120',
'title': 'Feltételeket szabott a főváros',
'ext': 'mp4',
'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
},
'skip': 'Webpage redirects to video list page',
}, {
# hirado
'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
'info_dict': {
'id': '6716068',
'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
'ext': 'mp4',
'upload_date': '20230911',
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
}
}, {
# petofilive
# (old) petofilive
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
'info_dict': {
'id': '4571948',
@@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20210607',
'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
},
'skip': 'Webpage redirects to empty page',
}, {
# petofilive
'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
'info_dict': {
'id': '6713233',
'title': 'Futball Fesztivál a Margitszigeten',
'display_id': 'futball-fesztival-a-margitszigeten',
'ext': 'mp4',
'upload_date': '20230909',
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
}
}]
@@ -84,8 +136,12 @@ class MediaKlikkIE(InfoExtractor):
player_data['video'] = player_data.pop('token')
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
player_json = self._search_json(
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
playlist_url = traverse_obj(
player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
if not playlist_url:
raise ExtractorError('Unable to extract playlist url')
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])

View File

@@ -127,7 +127,8 @@ class MediasetIE(ThePlatformBaseIE):
},
'params': {
'skip_download': True,
}
},
'skip': 'Dead link',
}, {
# WittyTV embed
'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
@@ -154,10 +155,12 @@ class MediasetIE(ThePlatformBaseIE):
}
}]
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
def _parse_smil_formats_and_subtitles(
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
for video in smil.findall(self._xpath_ns('.//video', namespace)):
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
def _check_drm_formats(self, tp_formats, video_id):
has_nondrm, drm_manifest = False, ''

View File

@@ -171,7 +171,7 @@ class MediasiteIE(InfoExtractor):
query = mobj.group('query')
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
redirect_url = urlh.geturl()
redirect_url = urlh.url
# XXX: might have also extracted UrlReferrer and QueryString from the html
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(

View File

@@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
def _extract_mediastream_urls(self, webpage):
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
@@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
self.raise_geo_restricted()
for message in [
'Debido a tu ubicación no puedes ver el contenido',
'You are not allowed to watch this video: Geo Fencing Restriction'
]:
if message in webpage:
self.raise_geo_restricted()
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)

View File

@@ -1,14 +1,14 @@
import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
ExtractorError,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
HEADRequest,
parse_qs,
unescapeHTML,
unified_timestamp,
@@ -160,5 +160,5 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
canonical_url = self._request_webpage(
HEADRequest(canonical_url), video_id,
note='Resolve canonical URL',
errnote='Could not resolve canonical URL').geturl()
errnote='Could not resolve canonical URL').url
return self.url_result(canonical_url, MegaTVComIE.ie_key(), video_id)

Some files were not shown because too many files have changed in this diff Show More