mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-21 23:48:57 +00:00
Merge remote-tracking branch 'origin' into yt-live-from-start-range
This commit is contained in:
@@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
@@ -123,7 +122,6 @@ from .applepodcasts import ApplePodcastsIE
|
||||
from .archiveorg import (
|
||||
ArchiveOrgIE,
|
||||
YoutubeWebArchiveIE,
|
||||
VLiveWebArchiveIE,
|
||||
)
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
@@ -139,10 +137,6 @@ from .arte import (
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
@@ -166,6 +160,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .axs import AxsIE
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
@@ -215,6 +210,7 @@ from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
@@ -223,7 +219,11 @@ from .bilibili import (
|
||||
BiliBiliPlayerIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BilibiliCollectionListIE,
|
||||
BilibiliSeriesListIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BilibiliWatchlaterIE,
|
||||
BilibiliPlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
@@ -271,6 +271,10 @@ from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .brilliantpala import (
|
||||
BrilliantpalaElearnIE,
|
||||
BrilliantpalaClassesIE,
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
@@ -292,9 +296,11 @@ from .cammodels import CamModelsIE
|
||||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
@@ -303,6 +309,7 @@ from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCPlayerPlaylistIE,
|
||||
CBCGemIE,
|
||||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
@@ -351,6 +358,10 @@ from .chirbit import (
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
CineverseIE,
|
||||
CineverseDetailsIE,
|
||||
)
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
@@ -560,8 +571,10 @@ from .epicon import (
|
||||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .erocast import ErocastIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
@@ -939,6 +952,7 @@ from .lastfm import (
|
||||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
LBRYPlaylistIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
@@ -1012,6 +1026,7 @@ from .lynda import (
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
@@ -1117,6 +1132,7 @@ from .mofosex import (
|
||||
MofosexEmbedIE,
|
||||
)
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
@@ -1141,6 +1157,7 @@ from .mtv import (
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
@@ -1288,6 +1305,11 @@ from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
)
|
||||
from .niconicochannelplus import (
|
||||
NiconicoChannelPlusIE,
|
||||
NiconicoChannelPlusChannelVideosIE,
|
||||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
@@ -1418,7 +1440,7 @@ from .patreon import (
|
||||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
@@ -1441,6 +1463,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
@@ -1498,6 +1521,7 @@ from .polskieradio import (
|
||||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -1516,7 +1540,7 @@ from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
@@ -1531,6 +1555,7 @@ from .prx import (
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
@@ -1551,7 +1576,14 @@ from .radiocanada import (
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
RadioFranceLiveIE,
|
||||
RadioFrancePodcastIE,
|
||||
RadioFranceProfileIE,
|
||||
RadioFranceProgramScheduleIE,
|
||||
)
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
@@ -1582,6 +1614,7 @@ from .rbmaradio import RBMARadioIE
|
||||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
RbgTumNewCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
@@ -1695,8 +1728,8 @@ from .megatvcom import (
|
||||
MegaTVComIE,
|
||||
MegaTVComEmbedIE,
|
||||
)
|
||||
from .ant1newsgr import (
|
||||
Ant1NewsGrWatchIE,
|
||||
from .antenna import (
|
||||
AntennaGrWatchIE,
|
||||
Ant1NewsGrArticleIE,
|
||||
Ant1NewsGrEmbedIE,
|
||||
)
|
||||
@@ -1706,6 +1739,10 @@ from .ruv import (
|
||||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import (
|
||||
S4CIE,
|
||||
S4CSeriesIE
|
||||
)
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
@@ -1786,7 +1823,10 @@ from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
SohuVIE,
|
||||
)
|
||||
from .sonyliv import (
|
||||
SonyLIVIE,
|
||||
SonyLIVSeriesIE,
|
||||
@@ -1854,6 +1894,10 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
@@ -1866,7 +1910,6 @@ from .storyfire import (
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamanity import StreamanityIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
@@ -1894,6 +1937,11 @@ from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tbsjp import (
|
||||
TBSJPEpisodeIE,
|
||||
TBSJPProgramIE,
|
||||
TBSJPPlaylistIE,
|
||||
)
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
@@ -1956,10 +2004,6 @@ from .theplatform import (
|
||||
)
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theta import (
|
||||
ThetaVideoIE,
|
||||
ThetaStreamIE,
|
||||
)
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
@@ -2267,6 +2311,8 @@ from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
VKPlayIE,
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
@@ -2339,7 +2385,8 @@ from .webofstories import (
|
||||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
WeiboVideoIE,
|
||||
WeiboUserIE,
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
@@ -2355,6 +2402,7 @@ from .weyyak import WeyyakIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .wistia import (
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
@@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
'id': '102520540',
|
||||
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
|
||||
video = True
|
||||
|
||||
if mobj is None:
|
||||
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
|
||||
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
@@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
|
||||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
youtube = mobj.group('type') == 'YouTube'
|
||||
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
|
||||
video = mobj.group('type') == 'Video' or traverse_obj(
|
||||
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
@@ -169,20 +180,103 @@ class ABCIViewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
|
||||
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
|
||||
'info_dict': {
|
||||
'id': 'CO1211V001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 1 Ep 1 Wood For The Trees',
|
||||
'series': 'Utopia',
|
||||
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
|
||||
'upload_date': '20230726',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'CO1211V',
|
||||
'episode_id': 'CO1211V001S00',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Wood For The Trees',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
|
||||
'timestamp': 1690403700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'title': 'Series 11 Ep 1',
|
||||
'series': 'Gruen',
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'LE1927H',
|
||||
'episode_id': 'LE1927H001S00',
|
||||
'season_number': 11,
|
||||
'season': 'Season 11',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode number',
|
||||
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
|
||||
'md5': '77cb7d8434440e3b28fbebe331c2456a',
|
||||
'info_dict': {
|
||||
'id': 'NC2203H039S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2022 Locking Up Kids',
|
||||
'series': 'Four Corners',
|
||||
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
|
||||
'upload_date': '20221114',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'NC2203H',
|
||||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode_number': None,
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name or number',
|
||||
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
|
||||
'md5': '2e17dec06b13cc81dc119d2565289396',
|
||||
'info_dict': {
|
||||
'id': 'RF2004Q043S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2021',
|
||||
'series': 'Landline',
|
||||
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
|
||||
'upload_date': '20211205',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'RF2004Q',
|
||||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'episode_number': None,
|
||||
'episode': None,
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
@@ -244,6 +338,8 @@ class ABCIViewIE(InfoExtractor):
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'episode': self._search_regex(
|
||||
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -12,7 +12,7 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from ..utils.networking import clean_proxies
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
@@ -22,80 +22,26 @@ from ..utils import (
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
@@ -137,11 +83,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
@@ -213,10 +159,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
||||
@@ -6,10 +6,8 @@ import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
@@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
if e.cause.status == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
||||
@@ -2,11 +2,11 @@ import getpass
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
@@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
|
||||
post_url = compat_urlparse.urljoin(urlh.url, post_url)
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
@@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
@@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
urlh.url, video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
@@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
urlh.url, video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
urlh.url, video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
@@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
urlh.url, video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
@@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
provider_redirect_page, url=urlh.url)
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
@@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
|
||||
@@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
info['formats'].extend(fmts)
|
||||
self._merge_subtitles(subs, target=info['subtitles'])
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
|
||||
@@ -338,6 +338,7 @@ class BiographyIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
@@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
@@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
||||
@@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,26 +1,30 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
make_archive_id,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
class AntennaBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
|
||||
video_id, query={'cid': cid or video_id})
|
||||
if not info.get('url'):
|
||||
raise ExtractorError(f'No source found for {video_id}')
|
||||
|
||||
ext = determine_ext(info['url'])
|
||||
if ext == 'm3u8':
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
|
||||
else:
|
||||
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
|
||||
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
@@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
class AntennaGrWatchIE(AntennaBaseIE):
|
||||
IE_NAME = 'antenna:watch'
|
||||
IE_DESC = 'antenna.gr and ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
|
||||
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
|
||||
'info_dict': {
|
||||
'id': '1643812',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -52,11 +66,12 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
info['description'] = self._og_search_description(webpage, default=None)
|
||||
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)],
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
@@ -96,7 +111,7 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
@@ -121,7 +136,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
errnote='Could not resolve canonical player URL').url
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
@@ -899,7 +898,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
@@ -926,7 +925,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
@@ -947,237 +946,3 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
)))
|
||||
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
if 'HLS' in stream['protocol']:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
|
||||
@@ -1,196 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
|
||||
@@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
|
||||
87
yt_dlp/extractor/axs.py
Normal file
87
yt_dlp/extractor/axs.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -31,7 +31,7 @@ class BanByeBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
@@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
|
||||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
'ext': 'mp4',
|
||||
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
|
||||
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
|
||||
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
|
||||
'channel_id': 'ch_QgWnHvDG2fo5',
|
||||
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
|
||||
'duration': 597,
|
||||
'timestamp': 1688642656,
|
||||
'upload_date': '20230706',
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
|
||||
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -2,11 +2,11 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -15,11 +15,13 @@ from ..utils import (
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
@@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -277,7 +264,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
if self._LOGIN_URL in urlh.url:
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
@@ -388,8 +375,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
if not (isinstance(e.exc_info[1], HTTPError)
|
||||
and e.exc_info[1].status in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
@@ -472,7 +459,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
@@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
@@ -983,7 +984,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
@@ -1128,6 +1129,13 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@@ -8,7 +9,8 @@ from ..utils import (
|
||||
class BildIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||
IE_DESC = 'Bild.de'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'note': 'static MP4 only',
|
||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||
'info_dict': {
|
||||
@@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
|
||||
'info_dict': {
|
||||
'id': '85158620',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Sprungturm-Skandal',
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -27,11 +41,23 @@ class BildIE(InfoExtractor):
|
||||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
formats = []
|
||||
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
|
||||
src_type = src.get('type')
|
||||
if src_type == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'video/mp4':
|
||||
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
||||
|
||||
@@ -3,21 +3,24 @@ import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -33,27 +36,31 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
'filesize': int_or_none(audio.get('size')),
|
||||
'format_id': str_or_none(audio.get('id')),
|
||||
} for audio in audios]
|
||||
|
||||
formats.extend({
|
||||
@@ -64,9 +71,13 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
'format_id': traverse_obj(
|
||||
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
|
||||
('id', {str_or_none}), get_all=False),
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
@@ -135,9 +146,20 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
def _get_episodes_from_season(self, ss_id, url):
|
||||
season_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||
note='Downloading season info', query={'season_id': ss_id},
|
||||
headers={'Referer': url, **self.geo_verification_headers()})
|
||||
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
@@ -233,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫Tech',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
@@ -403,77 +425,94 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'episode': '残酷',
|
||||
'episode_id': '267851',
|
||||
'episode_number': 1,
|
||||
'title': '1 残酷',
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
@@ -485,16 +524,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
@@ -575,7 +624,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
@@ -633,13 +682,35 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||
|
||||
def _get_uploader(self, uid, playlist_id):
|
||||
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
metadata.pop('page_count', None)
|
||||
metadata.pop('page_size', None)
|
||||
return metadata, page_list
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '《底特律 变人》'
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
@@ -660,22 +731,251 @@ class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**traverse_obj(page_data, {
|
||||
'title': ('meta', 'name', {str}),
|
||||
'description': ('meta', 'description', {str}),
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
'description': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||
})
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||
'info_dict': {
|
||||
'id': '1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'description': '',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
fid = self._match_id(url)
|
||||
|
||||
list_info = self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||
fid, note='Downloading favlist metadata')
|
||||
if list_info['code'] == -403:
|
||||
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||
|
||||
entries = self._get_entries(self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||
fid, note='Download favlist entries'), 'data')
|
||||
|
||||
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('intro', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'modified_timestamp': ('mtime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||
})))
|
||||
|
||||
|
||||
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||
watchlater_info = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||
if watchlater_info['code'] == -101:
|
||||
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||
|
||||
|
||||
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
'title': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
if not page_data.get('has_more', False):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
'with_current': False,
|
||||
**traverse_obj(initial_state, {
|
||||
'type': ('playlist', 'type', {int_or_none}),
|
||||
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'Bilibili category extractor'
|
||||
_MAX_RESULTS = 1000000
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
@@ -1360,7 +1660,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
|
||||
|
||||
class BiliLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
|
||||
@@ -2,9 +2,9 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
|
||||
@@ -1,56 +1,170 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
@@ -155,7 +155,7 @@ class BravoTVIE(AdobePassIE):
|
||||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
@@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
@@ -915,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
||||
127
yt_dlp/extractor/brilliantpala.py
Normal file
127
yt_dlp/extractor/brilliantpala.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BrilliantpalaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brilliantpala'
|
||||
_DOMAIN = '{subdomain}.brilliantpala.org'
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
self._HOMEPAGE = f'https://{self._DOMAIN}'
|
||||
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
|
||||
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
|
||||
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
|
||||
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
|
||||
|
||||
logged_page = self._download_webpage(
|
||||
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if self._html_search_regex(
|
||||
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
|
||||
raise ExtractorError('wrong username or password', expected=True)
|
||||
|
||||
# the maximum number of logins is one
|
||||
if self._html_search_regex(
|
||||
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
|
||||
logout_device_form = self._hidden_inputs(logged_page)
|
||||
self._download_webpage(
|
||||
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
|
||||
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
|
||||
video_id = f'{course_id}-{content_id}'
|
||||
|
||||
username = self._get_logged_in_username(url, video_id)
|
||||
|
||||
content_json = self._download_json(
|
||||
self._CONTENT_API.format(content_id=content_id), video_id,
|
||||
note='Fetching content info', errnote='Unable to fetch content info')
|
||||
|
||||
entries = []
|
||||
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
entries.append({
|
||||
'id': str(stream['id']),
|
||||
'title': content_json.get('title'),
|
||||
'formats': formats,
|
||||
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
|
||||
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
|
||||
'thumbnail': content_json.get('cover_image'),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
|
||||
|
||||
|
||||
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Elearn'
|
||||
IE_DESC = 'VoD on elearn.brilliantpala.org'
|
||||
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
|
||||
'info_dict': {
|
||||
'id': '23577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Physical World, Units and Measurements - 1',
|
||||
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
|
||||
|
||||
|
||||
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Classes'
|
||||
IE_DESC = 'VoD on classes.brilliantpala.org'
|
||||
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
|
||||
'info_dict': {
|
||||
'id': '9128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motion in a Straight Line - Class 1',
|
||||
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
|
||||
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
||||
@@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
|
||||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# if '/blocage' in response.url:
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
||||
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
||||
@@ -2,6 +2,7 @@ import re
|
||||
import json
|
||||
import base64
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -65,6 +66,7 @@ class CBCIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
@@ -96,7 +98,7 @@ class CBCIE(InfoExtractor):
|
||||
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
|
||||
},
|
||||
@@ -161,7 +163,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
'skip': 'Geo-restricted to Canada and no longer available',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
@@ -174,6 +176,9 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
@@ -186,6 +191,28 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -199,9 +226,42 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
|
||||
class CBCPlayerPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/tv shows/the national/latest broadcast',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/news/Canada/North',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/canada/north',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
json_content = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
|
||||
|
||||
def entries():
|
||||
for video_id in traverse_obj(json_content, (
|
||||
'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
|
||||
)):
|
||||
yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
|
||||
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
@@ -280,12 +340,12 @@ class CBCGemIE(InfoExtractor):
|
||||
data = json.dumps({'jwt': sig}).encode()
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||
None, data=data, headers=headers)
|
||||
None, data=data, headers=headers, expected_status=426)
|
||||
cbc_access_token = resp['accessToken']
|
||||
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||
None, headers=headers)
|
||||
None, headers=headers, expected_status=426)
|
||||
return resp['claimsToken']
|
||||
|
||||
def _get_claims_token_expiry(self):
|
||||
@@ -417,6 +477,10 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||
'id': 'schitts-creek/s06',
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 6,
|
||||
'season': 'Season 6',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
|
||||
@@ -101,6 +101,7 @@ class CBSIE(CBSBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
|
||||
'info_dict': {
|
||||
@@ -117,6 +118,7 @@ class CBSIE(CBSBaseIE):
|
||||
},
|
||||
'expected_warnings': [
|
||||
'This content expired on', 'No video formats found', 'Requested format is not available'],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -7,9 +7,9 @@ import zlib
|
||||
from .anvato import AnvatoIE
|
||||
from .common import InfoExtractor
|
||||
from .paramountplus import ParamountPlusIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
||||
@@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
|
||||
'id': '30c3',
|
||||
},
|
||||
'playlist_count': 135,
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/c/DS2023',
|
||||
'info_dict': {
|
||||
'title': 'Datenspuren 2023',
|
||||
'id': 'DS2023',
|
||||
},
|
||||
'playlist_count': 37
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url).lower()
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
conf = self._download_json(
|
||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
USER_AGENTS = {
|
||||
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
|
||||
}
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
@@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.url)
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
@@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
req = Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
||||
req.headers['x-addr'] = '127.0.0.1'
|
||||
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
req.headers['User-Agent'] = user_agent
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
@@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
req = Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
import urllib.error
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
|
||||
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
|
||||
if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
|
||||
or isinstance(e.cause, json.JSONDecodeError)):
|
||||
self.raise_login_required(method='cookies')
|
||||
raise
|
||||
|
||||
136
yt_dlp/extractor/cineverse.py
Normal file
136
yt_dlp/extractor/cineverse.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CineverseBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
|
||||
'cineverse.com',
|
||||
'asiancrush.com',
|
||||
'dovechannel.com',
|
||||
'screambox.com',
|
||||
'midnightpulp.com',
|
||||
'fandor.com',
|
||||
'retrocrush.tv',
|
||||
)))
|
||||
|
||||
|
||||
class CineverseIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Women Who Flirt',
|
||||
'ext': 'mp4',
|
||||
'id': 'DMR00018919',
|
||||
'modified_timestamp': 1678744575289,
|
||||
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
|
||||
'duration': 5811.597,
|
||||
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Archenemy! Crystal Bowie',
|
||||
'ext': 'mp4',
|
||||
'id': '1000000023016',
|
||||
'episode_number': 3,
|
||||
'season_number': 1,
|
||||
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
|
||||
'age_limit': 0,
|
||||
'episode': 'Episode 3',
|
||||
'season': 'Season 1',
|
||||
'duration': 1485.067,
|
||||
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
|
||||
'series': 'Space Adventure COBRA (Original Japanese)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, default={})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
countries=smuggled_data.get('geo_countries'))
|
||||
|
||||
return {
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
|
||||
**traverse_obj(idetails, {
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
'episode_number': ('details', 'episode', {int_or_none}),
|
||||
'age_limit': ('details', 'rating_code', {parse_age_limit}),
|
||||
'series': ('details', 'series_details', 'title'),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CineverseDetailsIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'title': 'Space Adventure COBRA (Original Japanese)',
|
||||
'id': '1000000023012',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
|
||||
'info_dict': {
|
||||
'id': 'NNVG4938',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hansel and Gretel',
|
||||
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
|
||||
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
|
||||
'duration': 7030.732,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, series_id = self._match_valid_url(url).group('host', 'id')
|
||||
html = self._download_webpage(url, series_id)
|
||||
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
|
||||
|
||||
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
|
||||
geoblocked = traverse_obj(pageprops, (
|
||||
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
|
||||
|
||||
def item_result(item):
|
||||
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
|
||||
if geoblocked:
|
||||
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
|
||||
return self.url_result(item_url, CineverseIE)
|
||||
|
||||
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
|
||||
if season:
|
||||
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
|
||||
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
|
||||
return item_result(pageprops['itemDetailsData'])
|
||||
@@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
if rcid:
|
||||
webpage = self._download_webpage(url, None, note='Getting video ID')
|
||||
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').geturl()
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').url
|
||||
mobj = self._match_valid_url(url)
|
||||
subdomain = mobj.group('subdomain')
|
||||
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
|
||||
@@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
|
||||
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
|
||||
|
||||
if urlh.getcode() == 403:
|
||||
if urlh.status == 403:
|
||||
if stream['code'] == 53004:
|
||||
self.raise_login_required()
|
||||
if stream['code'] == 53005:
|
||||
@@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
|
||||
'This video is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
|
||||
|
||||
if urlh.getcode() == 429:
|
||||
if urlh.status == 429:
|
||||
self.raise_login_required(
|
||||
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
|
||||
method='cookies')
|
||||
|
||||
@@ -19,6 +19,7 @@ class CNBCIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -49,6 +50,7 @@ class CNBCVideoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -17,15 +17,26 @@ import subprocess
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
@@ -34,7 +45,6 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -60,7 +70,6 @@ from ..utils import (
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
netrc_from_content,
|
||||
network_exceptions,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
@@ -70,7 +79,6 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@@ -82,8 +90,6 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@@ -224,7 +230,8 @@ class InfoExtractor:
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* has_drm True if the format has DRM and cannot be downloaded.
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
@@ -722,11 +729,11 @@ class InfoExtractor:
|
||||
except UnsupportedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
e.video_id = e.video_id or self.get_temp_id(url),
|
||||
e.video_id = e.video_id or self.get_temp_id(url)
|
||||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
|
||||
@@ -785,20 +792,25 @@ class InfoExtractor:
|
||||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, urllib.error.HTTPError)
|
||||
assert isinstance(err, HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
return expected_status(err.status) is True
|
||||
else:
|
||||
return err.code in variadic(expected_status)
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
return sanitized_Request(url_or_request, data, headers or {})
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
url_or_request = urllib_req_to_req(url_or_request)
|
||||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
"""
|
||||
@@ -834,14 +846,9 @@ class InfoExtractor:
|
||||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError):
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
return err.response
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
@@ -973,11 +980,11 @@ class InfoExtractor:
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.geturl(), video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
@@ -1035,7 +1042,7 @@ class InfoExtractor:
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
@@ -1109,7 +1116,7 @@ class InfoExtractor:
|
||||
while True:
|
||||
try:
|
||||
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
@@ -1680,7 +1687,7 @@ class InfoExtractor:
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
rectx = re.escape(context_name)
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
js, arg_keys, arg_vals = self._search_regex(
|
||||
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||
@@ -1806,7 +1813,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
manifest, urlh = res
|
||||
manifest_url = urlh.geturl()
|
||||
manifest_url = urlh.url
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
|
||||
@@ -1965,7 +1972,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
m3u8_url = urlh.url
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
@@ -1979,11 +1986,7 @@ class InfoExtractor:
|
||||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), m3u8_doc)
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
@@ -2245,18 +2248,10 @@ class InfoExtractor:
|
||||
if res is False:
|
||||
assert not fatal
|
||||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
|
||||
namespace=self._parse_smil_namespace(smil))
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
@@ -2270,7 +2265,7 @@ class InfoExtractor:
|
||||
return {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
@@ -2282,9 +2277,8 @@ class InfoExtractor:
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
formats, subtitles = self._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
@@ -2323,7 +2317,14 @@ class InfoExtractor:
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self._report_ignoring_subs('SMIL')
|
||||
return fmts
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
@@ -2331,7 +2332,7 @@ class InfoExtractor:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
@@ -2379,8 +2380,9 @@ class InfoExtractor:
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
@@ -2401,11 +2403,15 @@ class InfoExtractor:
|
||||
f4m_url += urllib.parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
self._merge_subtitles(mpd_subs, target=subtitles)
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
|
||||
src_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(ism_formats)
|
||||
self._merge_subtitles(ism_subs, target=subtitles)
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
@@ -2436,7 +2442,10 @@ class InfoExtractor:
|
||||
'format_note': 'SMIL storyboards',
|
||||
})
|
||||
|
||||
return formats
|
||||
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
self._merge_subtitles(smil_subs, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
@@ -2462,7 +2471,7 @@ class InfoExtractor:
|
||||
return []
|
||||
|
||||
xspf, urlh = res
|
||||
xspf_url = urlh.geturl()
|
||||
xspf_url = urlh.url
|
||||
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
@@ -2533,7 +2542,7 @@ class InfoExtractor:
|
||||
return [], {}
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.geturl()
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
@@ -2919,7 +2928,7 @@ class InfoExtractor:
|
||||
if ism_doc is None:
|
||||
return [], {}
|
||||
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
|
||||
|
||||
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
|
||||
@@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
@@ -54,6 +54,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
# FIXME: yt-dlp wrongly raises for geo restriction
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
@@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
|
||||
errnote='Unable to download media JSON')
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import base64
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_QUERY = {}
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return self._get_cookies(self._BASE_URL).get('etp_rt')
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
@@ -62,49 +75,49 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_query(self, lang):
|
||||
if lang in CrunchyrollBaseIE._QUERY:
|
||||
return
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
|
||||
|
||||
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
|
||||
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
|
||||
'locale': ('localization', 'locale'),
|
||||
}) or None
|
||||
|
||||
if CrunchyrollBaseIE._BASIC_AUTH:
|
||||
return
|
||||
|
||||
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
|
||||
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_query(lang)
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
@@ -114,7 +127,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
@@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
@@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
@@ -490,8 +503,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
@@ -519,11 +545,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
@@ -105,7 +105,7 @@ class DacastVODIE(DacastBaseIE):
|
||||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
# CDN will randomly respond with 403
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
@@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), xid)['error_description'], expected=True)
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
|
||||
@@ -3,8 +3,8 @@ import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
@@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.read().decode(), display_id)['description']
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
|
||||
@@ -1,31 +1,72 @@
|
||||
import time
|
||||
import hashlib
|
||||
import re
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
if js_code:
|
||||
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||
return js_code
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
c = round(time.time())
|
||||
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||
phantom = PhantomJSwrapper(self)
|
||||
result = phantom.execute(js_script, video_id,
|
||||
note='Executing JS signing script').strip()
|
||||
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||
|
||||
def _search_js_sign_func(self, webpage, fatal=True):
|
||||
# The greedy look-behind ensures last possible script tag is matched
|
||||
return self._search_regex(
|
||||
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||
|
||||
|
||||
class DouyuTVIE(DouyuBaseIE):
|
||||
IE_DESC = '斗鱼直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'url': 'https://www.douyu.com/pigff',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'uploader': '7师傅',
|
||||
'id': '24422',
|
||||
'display_id': 'pigff',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||
'thumbnail': str,
|
||||
'uploader': 'pigff',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_sign_func(self, room_id, video_id):
|
||||
return self._download_json(
|
||||
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||
note='Getting signing script')['data'][f'room{room_id}']
|
||||
|
||||
def _extract_stream_formats(self, stream_formats):
|
||||
formats = []
|
||||
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||
stream_url = urljoin(
|
||||
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||
if stream_url:
|
||||
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||
ext = determine_ext(stream_url)
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'format_id': str_or_none(rate_id),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||
**traverse_obj(rate_info, {
|
||||
'format': ('name', {str_or_none}),
|
||||
'tbr': ('bit', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||
|
||||
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
# Grab metadata from API
|
||||
params = {
|
||||
@@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
|
||||
'time': int(time.time()),
|
||||
}
|
||||
params['auth'] = hashlib.md5(
|
||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = self._download_json(
|
||||
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = traverse_obj(self._download_json(
|
||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||
note='Downloading room info', query=params)['data']
|
||||
note='Downloading room info', query=params, fatal=False), 'data')
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
if traverse_obj(room, 'show_status') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
||||
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||
form_data = {
|
||||
'rate': 0,
|
||||
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||
}
|
||||
stream_formats = [self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note="Downloading livestream format",
|
||||
data=urlencode_postdata(form_data))]
|
||||
|
||||
title = unescapeHTML(room['room_name'])
|
||||
description = room.get('show_details')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||
form_data['rate'] = rate_id
|
||||
stream_formats.append(self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note=f'Downloading livestream format {rate_id}',
|
||||
data=urlencode_postdata(form_data)))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': self._extract_stream_formats(stream_formats),
|
||||
'is_live': True,
|
||||
'subtitles': subs,
|
||||
'formats': formats,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
class DouyuShowIE(DouyuBaseIE):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'id': 'mPyq7oVNe5Yv1gLY',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||
'duration': 633,
|
||||
'thumbnail': str,
|
||||
'uploader': '美食作家王刚V',
|
||||
'uploader_id': 'OVAO4NVx1m7Q',
|
||||
'timestamp': 1661850002,
|
||||
'upload_date': '20220830',
|
||||
'view_count': int,
|
||||
'tags': ['美食', '美食综合'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'super': '原画',
|
||||
'high': '超清',
|
||||
'normal': '高清',
|
||||
}
|
||||
|
||||
_QUALITIES = {
|
||||
'super': -1,
|
||||
'high': -2,
|
||||
'normal': -3,
|
||||
}
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'super': '1920x1080',
|
||||
'high': '1280x720',
|
||||
'normal': '852x480',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
video_info = self._search_json(
|
||||
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||
'video info', video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = None
|
||||
js_sign_func = self._search_js_sign_func(webpage)
|
||||
form_data = {
|
||||
'vid': video_id,
|
||||
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||
}
|
||||
url_info = self._download_json(
|
||||
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
formats = []
|
||||
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'format': self._FORMATS.get(name),
|
||||
'format_id': name,
|
||||
'url': video_url,
|
||||
'quality': self._QUALITIES.get(name),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||
})
|
||||
else:
|
||||
self.to_screen(
|
||||
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
**traverse_obj(video_info, ('DATA', {
|
||||
'title': ('content', 'title', {str}),
|
||||
'uploader': ('content', 'author', {str}),
|
||||
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||
'duration': ('content', 'video_duration', {int_or_none}),
|
||||
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||
'view_count': ('content', 'view_num', {int_or_none}),
|
||||
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
return f'Bearer {token}'
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
@@ -87,7 +87,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
@@ -99,7 +99,7 @@ class DPlayBaseIE(InfoExtractor):
|
||||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_dict in streaming:
|
||||
@@ -746,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
|
||||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
@@ -767,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
||||
'info_dict': {
|
||||
'id': '4922860',
|
||||
'ext': 'mp4',
|
||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadworthy Rescues',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'upload_date': '20220907',
|
||||
'timestamp': 1662523200,
|
||||
'duration': 1066.356,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import base64
|
||||
import os.path
|
||||
import re
|
||||
|
||||
@@ -5,14 +6,13 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
@@ -22,7 +22,16 @@ class DropboxIE(InfoExtractor):
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
@@ -53,16 +62,25 @@ class DropboxIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
|
||||
contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
|
||||
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
|
||||
formats, subtitles, has_anonymous_download = [], {}, False
|
||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||
transcode_url = self._search_regex(
|
||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||
if not transcode_url:
|
||||
continue
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||
has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||
break
|
||||
|
||||
# downloads enabled we can get the original file
|
||||
if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []):
|
||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
|
||||
if has_anonymous_download:
|
||||
formats.append({
|
||||
'url': update_url_query(url, {'dl': '1'}),
|
||||
'format_id': 'original',
|
||||
'format_note': 'Original',
|
||||
'quality': 1
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -2,7 +2,7 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor):
|
||||
response = super(EaglePlatformIE, self)._download_json(
|
||||
url_or_request, video_id, *args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError):
|
||||
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||
if isinstance(ee.cause, HTTPError):
|
||||
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
|
||||
self._handle_error(response)
|
||||
raise
|
||||
return response
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
@@ -54,7 +50,7 @@ class EitbIE(InfoExtractor):
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
|
||||
96
yt_dlp/extractor/eplus.py
Normal file
96
yt_dlp/extractor/eplus.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class EplusIbIE(InfoExtractor):
|
||||
IE_NAME = 'eplus:inbound'
|
||||
IE_DESC = 'e+ (イープラス) overseas'
|
||||
_VALID_URL = r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Could not find the playlist URL. This event may not be accessible',
|
||||
'No video formats found!',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_json = self._search_json(r'<script>\s*var app\s*=', webpage, 'data json', video_id)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
release_timestamp_str = data_json.get('event_datetime_text') # JST
|
||||
|
||||
self.write_debug(f'delivery_status = {delivery_status}, archive_mode = {archive_mode}')
|
||||
|
||||
if delivery_status == 'PREPARING':
|
||||
live_status = 'is_upcoming'
|
||||
elif delivery_status == 'STARTED':
|
||||
live_status = 'is_live'
|
||||
elif delivery_status == 'STOPPED':
|
||||
if archive_mode != 'ON':
|
||||
raise ExtractorError(
|
||||
'This event has ended and there is no archive for this event', expected=True)
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'WAIT_CONFIRM_ARCHIVED':
|
||||
live_status = 'post_live'
|
||||
elif delivery_status == 'CONFIRMED_ARCHIVE':
|
||||
live_status = 'was_live'
|
||||
else:
|
||||
self.report_warning(f'Unknown delivery_status {delivery_status}, treat it as a live')
|
||||
live_status = 'is_live'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_playlist_urls = self._search_json(
|
||||
r'var listChannels\s*=', webpage, 'hls URLs', video_id, contains_pattern=r'\[.+\]', default=[])
|
||||
if not m3u8_playlist_urls:
|
||||
if live_status == 'is_upcoming':
|
||||
self.raise_no_formats(
|
||||
f'Could not find the playlist URL. This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
'Could not find the playlist URL. This event may not be accessible', expected=True)
|
||||
elif live_status == 'is_upcoming':
|
||||
self.raise_no_formats(f'This live event will begin at {release_timestamp_str} JST', expected=True)
|
||||
elif live_status == 'post_live':
|
||||
self.raise_no_formats('This event has ended, and the archive will be available shortly', expected=True)
|
||||
else:
|
||||
for m3u8_playlist_url in m3u8_playlist_urls:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_playlist_url, video_id))
|
||||
# FIXME: HTTP request headers need to be updated to continue download
|
||||
warning = 'Due to technical limitations, the download will be interrupted after one hour'
|
||||
if live_status == 'is_live':
|
||||
self.report_warning(warning)
|
||||
elif live_status == 'was_live':
|
||||
self.report_warning(f'{warning}. You can restart to continue the download')
|
||||
|
||||
return {
|
||||
'id': data_json['app_id'],
|
||||
'title': data_json.get('app_name'),
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'description': data_json.get('content'),
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
||||
@@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
video_id = self._match_id(urlh.url)
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
63
yt_dlp/extractor/erocast.py
Normal file
63
yt_dlp/extractor/erocast.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ErocastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://erocast.me/track/9787/f',
|
||||
'md5': 'af63b91f5f231096aba54dd682abea3b',
|
||||
'info_dict': {
|
||||
'id': '9787',
|
||||
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
|
||||
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
|
||||
'ext': 'm4a',
|
||||
'age_limit': 18,
|
||||
'release_timestamp': 1696178652,
|
||||
'release_date': '20231001',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'description': 'ExtraTerrestrial Tuesday!',
|
||||
'uploader': 'clarissaisshy',
|
||||
'uploader_id': '8113',
|
||||
'uploader_url': 'https://erocast.me/clarissaisshy',
|
||||
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
|
||||
'duration': 2307,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
|
||||
'age_limit': 18,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
'uploader_url': ('user', 'permalink_url', {url_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('plays', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'webpage_url': ('permalink_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -41,7 +41,7 @@ class EttuTvIE(InfoExtractor):
|
||||
'device': 'desktop',
|
||||
})
|
||||
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_response['data']['stream'], video_id, 'mp4')
|
||||
|
||||
@@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:expressen|di)\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?:(?:tvspelare/video|video-?player/embed)/)?
|
||||
(?:tv|nyheter)/(?:[^/?#]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
|
||||
@@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,6 +8,8 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
@@ -19,11 +21,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
network_exceptions,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@@ -73,6 +74,22 @@ class FacebookIE(InfoExtractor):
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/',
|
||||
'info_dict': {
|
||||
'id': '3676516585958356',
|
||||
'ext': 'mp4',
|
||||
'title': 'dr Adam Przygoda',
|
||||
'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
|
||||
'uploader': 'RADIO KICKS FM',
|
||||
'upload_date': '20230818',
|
||||
'timestamp': 1692346159,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': '100063551323670',
|
||||
'duration': 3132.184,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
'info_dict': {
|
||||
@@ -90,16 +107,16 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt',
|
||||
'description': 'Asif Nawab Butt',
|
||||
'title': 'Asif',
|
||||
'description': '',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
]
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
@@ -151,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '3f3798adb2b73423263e59376f1f5eb7',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -162,6 +179,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -170,12 +190,16 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
|
||||
'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
|
||||
'description': 'Довгоочікуване відео',
|
||||
'timestamp': 1486648771,
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
'uploader_id': '100000948048708',
|
||||
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
|
||||
'concurrent_view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'duration': 11736.446,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -192,9 +216,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'La Guía Del Varón',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
@@ -208,9 +230,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
'uploader_id': '100013949973717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -252,7 +272,11 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1527084179,
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '234218833769558',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -262,8 +286,17 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||
'info_dict': {
|
||||
'id': '106560053808006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||
@@ -276,6 +309,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '10157667649866271',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||
@@ -319,7 +353,7 @@ class FacebookIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
login_page_req = Request(self._LOGIN_URL)
|
||||
self._set_cookie('facebook.com', 'locale', 'en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
@@ -340,8 +374,8 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
@@ -367,8 +401,8 @@ class FacebookIE(InfoExtractor):
|
||||
'h': h,
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
@@ -383,9 +417,9 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def extract_metadata(webpage):
|
||||
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
|
||||
r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)]
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
@@ -463,25 +497,25 @@ class FacebookIE(InfoExtractor):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
info['_format_sort_fields'] = ('res', 'quality')
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
r'data-sjs>({.*?%s.*?})</script>' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@@ -492,15 +526,23 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
reel_info = traverse_obj(
|
||||
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
|
||||
if reel_info:
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', '')):
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
@@ -509,19 +551,20 @@ class FacebookIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': traverse_obj(
|
||||
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
|
||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
@@ -676,9 +719,11 @@ class FacebookIE(InfoExtractor):
|
||||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
preference = -10 if format_id == 'progressive' else -1
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
# TODO: investigate if progressive or src formats still exist
|
||||
preference = -10 if format_id == 'progressive' else -3
|
||||
if quality == 'hd':
|
||||
preference += 5
|
||||
preference += 1
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
@@ -782,18 +827,18 @@ class FacebookReelIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
|
||||
'description': 'md5:24ea7ef062215d295bdde64e778f5474',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '1738535909799870',
|
||||
'duration': 9.536,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'timestamp': 1637502604,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..dependencies import websockets
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
WebSocketsWrapper,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
@@ -57,7 +57,7 @@ class FC2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
login_data = urlencode_postdata(login_form_strs)
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
@@ -66,7 +66,7 @@ class FC2IE(InfoExtractor):
|
||||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
@@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor):
|
||||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
@@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor):
|
||||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||
@@ -50,6 +50,10 @@ class FOXIE(InfoExtractor):
|
||||
# sports event, geo-restricted
|
||||
'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# fox sports replay, geo-restricted
|
||||
'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
@@ -68,9 +72,9 @@ class FOXIE(InfoExtractor):
|
||||
'https://api3.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
entitlement_issues = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['entitlementIssues']
|
||||
e.cause.response.read().decode(), video_id)['entitlementIssues']
|
||||
for e in entitlement_issues:
|
||||
if e.get('errorCode') == 1005:
|
||||
raise ExtractorError(
|
||||
@@ -123,8 +127,8 @@ class FOXIE(InfoExtractor):
|
||||
try:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), video_id)
|
||||
if error.get('exception') == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(error['description'], expected=True)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import float_or_none, make_archive_id, smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
@@ -35,7 +36,7 @@ class FoxSportsIE(InfoExtractor):
|
||||
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
|
||||
})
|
||||
preplay_url = self._request_webpage(
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from ..utils import HEADRequest
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
|
||||
@@ -3,7 +3,7 @@ import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@@ -46,8 +46,8 @@ class FunimationBaseIE(InfoExtractor):
|
||||
}))
|
||||
FunimationBaseIE._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)['error']
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
error = self._parse_json(e.cause.response.read().decode(), None)['error']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ class Funker530IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = {}
|
||||
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
|
||||
if rumble_url:
|
||||
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
|
||||
|
||||
@@ -2,13 +2,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import remove_start, smuggle_url, urlencode_postdata
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
@@ -138,8 +133,8 @@ class GDCVaultIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(login_url, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request = Request(login_url, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
self._download_webpage(request, display_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, display_id, 'Logging out')
|
||||
@@ -163,7 +158,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
video_url = 'http://www.gdcvault.com' + direct_url
|
||||
# resolve the url so that we can detect the correct extension
|
||||
video_url = self._request_webpage(
|
||||
HEADRequest(video_url), video_id).geturl()
|
||||
HEADRequest(video_url), video_id).url
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -58,6 +58,8 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'trailer',
|
||||
'upload_date': '20100513',
|
||||
'direct': True,
|
||||
'timestamp': 1273772943.0,
|
||||
}
|
||||
},
|
||||
# Direct link to media delivered compressed (until Accept-Encoding is *)
|
||||
@@ -101,6 +103,8 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'webm',
|
||||
'title': '5_Lennart_Poettering_-_Systemd',
|
||||
'upload_date': '20141120',
|
||||
'direct': True,
|
||||
'timestamp': 1416498816.0,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
@@ -133,6 +137,7 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20201204',
|
||||
},
|
||||
}],
|
||||
'skip': 'Dead link',
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
@@ -145,12 +150,12 @@ class GenericIE(InfoExtractor):
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'id': '818a5d38-01cd-152f-2231-ee479677fa82',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'duration': 423,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
@@ -267,6 +272,7 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||
{
|
||||
@@ -278,6 +284,7 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
'upload_date': '20130904',
|
||||
'timestamp': 1378272859.0,
|
||||
},
|
||||
},
|
||||
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
|
||||
@@ -318,7 +325,7 @@ class GenericIE(InfoExtractor):
|
||||
'id': 'cmQHVoWB5FY',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130224',
|
||||
'uploader_id': 'TheVerge',
|
||||
'uploader_id': '@TheVerge',
|
||||
'description': r're:^Chris Ziegler takes a look at the\.*',
|
||||
'uploader': 'The Verge',
|
||||
'title': 'First Firefox OS phones side-by-side',
|
||||
@@ -2370,7 +2377,7 @@ class GenericIE(InfoExtractor):
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': urljoin(url, thumbnail),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -2431,7 +2438,7 @@ class GenericIE(InfoExtractor):
|
||||
'Accept-Encoding': 'identity',
|
||||
**smuggled_data.get('http_headers', {})
|
||||
})
|
||||
new_url = full_response.geturl()
|
||||
new_url = full_response.url
|
||||
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -2529,12 +2536,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=full_response.geturl()),
|
||||
xspf_base_url=full_response.url),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
|
||||
doc,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_base_url=full_response.url.rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
self.report_detected('DASH manifest')
|
||||
@@ -2562,7 +2569,7 @@ class GenericIE(InfoExtractor):
|
||||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
|
||||
if len(embeds) == 1:
|
||||
return {**info_dict, **embeds[0]}
|
||||
return merge_dicts(embeds[0], info_dict)
|
||||
elif embeds:
|
||||
return self.playlist_result(embeds, **info_dict)
|
||||
raise UnsupportedError(url)
|
||||
@@ -2572,7 +2579,7 @@ class GenericIE(InfoExtractor):
|
||||
info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
|
||||
video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
actual_url = urlh.geturl() if urlh else url
|
||||
actual_url = urlh.url if urlh else url
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
|
||||
@@ -8,8 +8,8 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
orderedSet,
|
||||
|
||||
@@ -60,13 +60,13 @@ class GofileIE(InfoExtractor):
|
||||
account_data = self._download_json(
|
||||
'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
|
||||
self._TOKEN = account_data['data']['token']
|
||||
self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
|
||||
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
|
||||
|
||||
def _entries(self, file_id):
|
||||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': 12345,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
||||
@@ -5,7 +5,9 @@ from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
@@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 45,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
}
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
@@ -207,10 +210,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'export': 'download',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind):
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
return self._request_webpage(
|
||||
source_url, video_id, note='Requesting %s file' % kind,
|
||||
errnote='Unable to request %s file' % kind, fatal=False)
|
||||
errnote='Unable to request %s file' % kind, fatal=False, data=data)
|
||||
urlh = request_source_file(source_url, 'source')
|
||||
if urlh:
|
||||
def add_source_format(urlh):
|
||||
@@ -225,7 +228,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
# Using original URLs may result in redirect loop due to
|
||||
# google.com's cookies mistakenly used for googleusercontent.com
|
||||
# redirect URLs (see #23919).
|
||||
'url': urlh.geturl(),
|
||||
'url': urlh.url,
|
||||
'ext': determine_ext(title, 'mp4').lower(),
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
@@ -237,14 +240,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
urlh, url, video_id, note='Downloading confirmation page',
|
||||
errnote='Unable to confirm download', fatal=False)
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', default=None)
|
||||
if confirm:
|
||||
confirmed_source_url = update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
})
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||
confirmed_source_url = extract_attributes(
|
||||
get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
|
||||
if confirmed_source_url:
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
else:
|
||||
|
||||
@@ -383,9 +383,9 @@ class AwsIdp:
|
||||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
time_now = datetime.datetime.utcnow()
|
||||
time_now = datetime.datetime.now(datetime.timezone.utc)
|
||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||
time_string = datetime.datetime.utcnow().strftime(format_string)
|
||||
time_string = time_now.strftime(format_string)
|
||||
return time_string
|
||||
|
||||
def __str__(self):
|
||||
|
||||
@@ -126,7 +126,7 @@ class HKETVIE(InfoExtractor):
|
||||
# If we ever wanted to provide the final resolved URL that
|
||||
# does not require cookies, albeit with a shorter lifespan:
|
||||
# urlh = self._downloader.urlopen(file_url)
|
||||
# resolved_url = urlh.geturl()
|
||||
# resolved_url = urlh.url
|
||||
label = fmt.get('label')
|
||||
h = self._FORMAT_HEIGHTS.get(label)
|
||||
w = h * width // height if h and width and height else None
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
@@ -36,9 +32,9 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
('mediaType', 's'),
|
||||
('mediaId', video_id),
|
||||
])
|
||||
r = sanitized_Request(
|
||||
r = Request(
|
||||
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
|
||||
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
mkd = self._download_json(
|
||||
r, video_id, note='Requesting media key',
|
||||
errnote='Could not download media key')
|
||||
@@ -50,7 +46,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
req = self._request_webpage(
|
||||
redirect_req, video_id,
|
||||
note='Resolving final URL', errnote='Could not resolve final URL')
|
||||
video_url = req.geturl()
|
||||
video_url = req.url
|
||||
if video_url.endswith('.html'):
|
||||
raise ExtractorError('Redirect failed')
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@@ -83,7 +84,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
|
||||
(?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/
|
||||
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
|
||||
)?
|
||||
[^/?#]+/
|
||||
@@ -141,6 +142,51 @@ class HotStarIE(HotStarBaseIE):
|
||||
'duration': 1272,
|
||||
'channel_id': 3,
|
||||
},
|
||||
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
|
||||
'info_dict': {
|
||||
'id': '1260097320',
|
||||
'ext': 'mp4',
|
||||
'title': 'Back To School',
|
||||
'season': 'Chapter 1',
|
||||
'description': 'md5:b0d6a4c8a650681491e7405496fc7e13',
|
||||
'timestamp': 1650564000,
|
||||
'channel': 'Hotstar Specials',
|
||||
'series': 'Kana Kaanum Kaalangal',
|
||||
'season_number': 1,
|
||||
'season_id': 9441,
|
||||
'upload_date': '20220421',
|
||||
'episode': 'Back To School',
|
||||
'episode_number': 1,
|
||||
'duration': 1810,
|
||||
'channel_id': 54,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
|
||||
'info_dict': {
|
||||
'id': '1000262286',
|
||||
'ext': 'mp4',
|
||||
'title': 'E3 - SaiRat, Kahani Pyaar Ki',
|
||||
'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385',
|
||||
'episode': 'E3 - SaiRat, Kahani Pyaar Ki',
|
||||
'upload_date': '20210606',
|
||||
'timestamp': 1622943900,
|
||||
'duration': 5395,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
|
||||
'info_dict': {
|
||||
'id': '1000091195',
|
||||
'ext': 'mp4',
|
||||
'title': 'Premam',
|
||||
'release_year': 2015,
|
||||
'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
|
||||
'timestamp': 1462149000,
|
||||
'upload_date': '20160502',
|
||||
'episode': 'Premam',
|
||||
'duration': 8994,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
@@ -159,6 +205,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'episode': 'episode',
|
||||
'tv': 'episode',
|
||||
'shows': 'episode',
|
||||
'clips': 'content',
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
@@ -186,8 +233,10 @@ class HotStarIE(HotStarBaseIE):
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
cookies = self._get_cookies(url) # Cookies before any request
|
||||
|
||||
video_data = self._call_api_v1(f'{video_type}/detail', video_id,
|
||||
query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
|
||||
video_data = traverse_obj(
|
||||
self._call_api_v1(
|
||||
f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
|
||||
('body', 'results', 'item', {dict})) or {}
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
@@ -233,7 +282,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'height': int_or_none(playback_set.get('height')),
|
||||
}]
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
geo_restricted = True
|
||||
continue
|
||||
|
||||
@@ -272,6 +321,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
)
|
||||
|
||||
@@ -42,7 +42,7 @@ class HRTiBaseIE(InfoExtractor):
|
||||
'application_version': self._APP_VERSION
|
||||
}
|
||||
|
||||
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req.get_method = lambda: 'PUT'
|
||||
|
||||
resources = self._download_json(
|
||||
@@ -73,8 +73,8 @@ class HRTiBaseIE(InfoExtractor):
|
||||
self._login_url, None, note='Logging in', errnote='Unable to log in',
|
||||
data=json.dumps(auth_data).encode('utf-8'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
|
||||
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 406:
|
||||
auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
@@ -1,19 +1,32 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HungamaIE(InfoExtractor):
|
||||
class HungamaBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, content_id, fatal=False):
|
||||
return traverse_obj(self._download_json(
|
||||
f'https://cpage.api.hungama.com/v2/page/content/{content_id}/{path}/detail',
|
||||
content_id, fatal=fatal, query={
|
||||
'device': 'web',
|
||||
'platform': 'a',
|
||||
'storeId': '1',
|
||||
}), ('data', {dict})) or {}
|
||||
|
||||
|
||||
class HungamaIE(HungamaBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?hungama\.com/
|
||||
(?:www\.|un\.)?hungama\.com/
|
||||
(?:
|
||||
(?:video|movie)/[^/]+/|
|
||||
(?:video|movie|short-film)/[^/]+/|
|
||||
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@@ -25,13 +38,28 @@ class HungamaIE(InfoExtractor):
|
||||
'id': '39349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Krishna Chants',
|
||||
'description': 'Watch Krishna Chants video now. You can also watch other latest videos only at Hungama',
|
||||
'description': ' ',
|
||||
'upload_date': '20180829',
|
||||
'duration': 264,
|
||||
'timestamp': 1535500800,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://images.hungama.com/c/1/0dc/2ca/39349649/39349649_700x394.jpg',
|
||||
}
|
||||
'thumbnail': 'https://images1.hungama.com/tr:n-a_169_m/c/1/0dc/2ca/39349649/39349649_350x197.jpg?v=8',
|
||||
'tags': 'count:6',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/short-film/adira/102524179/',
|
||||
'md5': '2278463f5dc9db9054d0c02602d44666',
|
||||
'info_dict': {
|
||||
'id': '102524179',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adira',
|
||||
'description': 'md5:df20cd4d41eabb33634f06de1025a4b4',
|
||||
'upload_date': '20230417',
|
||||
'timestamp': 1681689600,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://images1.hungama.com/tr:n-a_23_m/c/1/197/ac9/102524179/102524179_350x525.jpg?v=1',
|
||||
'tags': 'count:7',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||
'only_matching': True,
|
||||
@@ -51,14 +79,19 @@ class HungamaIE(InfoExtractor):
|
||||
'c': 'common',
|
||||
'm': 'get_video_mdn_url',
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
|
||||
|
||||
json_ld = self._search_json_ld(
|
||||
self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
|
||||
metadata = self._call_api('movie', video_id)
|
||||
|
||||
return {
|
||||
**json_ld,
|
||||
**traverse_obj(metadata, ('head', 'data', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('misc', 'description', {str}),
|
||||
'duration': ('duration', {int}), # duration in JSON is incorrect if string
|
||||
'timestamp': ('releasedate', {unified_timestamp}),
|
||||
'view_count': ('misc', 'playcount', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'tags': ('misc', 'keywords', ..., {str}),
|
||||
})),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': {
|
||||
@@ -71,10 +104,10 @@ class HungamaIE(InfoExtractor):
|
||||
|
||||
|
||||
class HungamaSongIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
|
||||
'md5': 'd4a6a05a394ad0453a9bea3ca00e6024',
|
||||
'md5': '964f46828e8b250aa35e5fdcfdcac367',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp3',
|
||||
@@ -83,8 +116,22 @@ class HungamaSongIE(InfoExtractor):
|
||||
'artist': 'Lucky Ali',
|
||||
'album': None,
|
||||
'release_year': 2000,
|
||||
}
|
||||
}
|
||||
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/song/tum-kya-mile-from-rocky-aur-rani-kii-prem-kahaani/103553672',
|
||||
'md5': '964f46828e8b250aa35e5fdcfdcac367',
|
||||
'info_dict': {
|
||||
'id': '103553672',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:5ebeb1e10771b634ce5f700ce68ae5f4',
|
||||
'track': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
|
||||
'artist': 'Pritam Chakraborty, Arijit Singh, Shreya Ghoshal, Amitabh Bhattacharya',
|
||||
'album': 'Tum Kya Mile (From "Rocky Aur Rani Kii Prem Kahaani")',
|
||||
'release_year': 2023,
|
||||
'thumbnail': 'https://images.hungama.com/c/1/7c2/c7b/103553671/103553671_200x200.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
@@ -122,8 +169,8 @@ class HungamaSongIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class HungamaAlbumPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P<id>\d+)'
|
||||
class HungamaAlbumPlaylistIE(HungamaBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.|un\.)?hungama\.com/(?P<path>playlists|album)/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/',
|
||||
'playlist_mincount': 7,
|
||||
@@ -132,16 +179,24 @@ class HungamaAlbumPlaylistIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/',
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 33,
|
||||
'info_dict': {
|
||||
'id': '123063',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://un.hungama.com/album/what-jhumka-%3F-from-rocky-aur-rani-kii-prem-kahaani/103891805/',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': '103891805',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
|
||||
items = re.findall(ptrn, webpage)
|
||||
entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
|
||||
return self.playlist_result(entries, video_id)
|
||||
playlist_id, path = self._match_valid_url(url).group('id', 'path')
|
||||
data = self._call_api(remove_end(path, 's'), playlist_id, fatal=True)
|
||||
|
||||
def entries():
|
||||
for song_url in traverse_obj(data, ('body', 'rows', ..., 'data', 'misc', 'share', {url_or_none})):
|
||||
yield self.url_result(song_url, HungamaSongIE)
|
||||
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@@ -27,9 +28,9 @@ class IGNBaseIE(InfoExtractor):
|
||||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
e.cause.response.url, e.cause.status, e.cause.reason]
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
@@ -196,10 +197,6 @@ class IGNVideoIE(IGNBaseIE):
|
||||
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||
'duration': 298,
|
||||
'tags': 'count:13',
|
||||
'display_id': '112203',
|
||||
'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg',
|
||||
'duration': 298,
|
||||
'tags': 'count:13',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||
}, {
|
||||
@@ -226,7 +223,7 @@ class IGNVideoIE(IGNBaseIE):
|
||||
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||
new_url = urlh.geturl()
|
||||
new_url = urlh.url
|
||||
ign_url = compat_parse_qs(
|
||||
urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
|
||||
if ign_url:
|
||||
@@ -323,14 +320,14 @@ class IGNArticleIE(IGNBaseIE):
|
||||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError):
|
||||
if isinstance(e.cause, HTTPError):
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
if e.cause.code == 404:
|
||||
e.cause.response.url, e.cause.status, e.cause.reason]
|
||||
if e.cause.status == 404:
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
elif e.cause.code == 503:
|
||||
elif e.cause.status == 503:
|
||||
self.report_warning(error_to_compat_str(e.cause))
|
||||
return
|
||||
raise
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -52,9 +52,9 @@ class ImgGamingBaseIE(InfoExtractor):
|
||||
return self._call_api(
|
||||
stream_path, media_id)['playerUrlCallback']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
|
||||
self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
time_seconds,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -11,15 +11,14 @@ from ..utils import (
|
||||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
# Some example URLs covered by generic extractor:
|
||||
# http://indavideo.hu/video/Vicces_cica_1
|
||||
# http://index.indavideo.hu/video/2015_0728_beregszasz
|
||||
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# http://erotika.indavideo.hu/video/Amator_tini_punci
|
||||
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
|
||||
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
# https://indavideo.hu/video/Vicces_cica_1
|
||||
# https://index.indavideo.hu/video/Hod_Nemetorszagban
|
||||
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
|
||||
# https://film.indavideo.hu/video/f_farkaslesen
|
||||
# https://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
|
||||
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)//embed\.indavideo\.hu/player/video/[\da-f]+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'url': 'https://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
@@ -36,21 +35,33 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://indavideo.hu/video/Vicces_cica_1',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['cica', 'Jet_Pack'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
f'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/{video_id}/',
|
||||
video_id, query={'_': time_seconds()})['data']
|
||||
|
||||
video_urls = []
|
||||
|
||||
@@ -60,33 +71,21 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
elif isinstance(video_files, dict):
|
||||
video_urls.extend(video_files.values())
|
||||
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
filesh = video.get('filesh')
|
||||
filesh = video.get('filesh') or {}
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
|
||||
if filesh:
|
||||
if not height:
|
||||
continue
|
||||
token = filesh.get(compat_str(height))
|
||||
if token is None:
|
||||
continue
|
||||
video_url = update_url_query(video_url, {'token': token})
|
||||
if not height and len(filesh) == 1:
|
||||
height = int_or_none(list(filesh.keys())[0])
|
||||
token = filesh.get(str(height))
|
||||
if token is None:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'url': update_url_query(video_url, {'token': token}),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
@@ -103,7 +102,7 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
|
||||
@@ -3,9 +3,9 @@ import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
decode_base_n,
|
||||
@@ -442,7 +442,7 @@ class InstagramIE(InstagramBaseIE):
|
||||
shared_data = self._search_json(
|
||||
r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
|
||||
|
||||
if shared_data and self._LOGIN_URL not in urlh.geturl():
|
||||
if shared_data and self._LOGIN_URL not in urlh.url:
|
||||
media.update(traverse_obj(
|
||||
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
|
||||
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
|
||||
@@ -589,7 +589,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
|
||||
except ExtractorError as e:
|
||||
# if it's an error caused by a bad query, and there are
|
||||
# more GIS templates to try, ignore it and keep trying
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
if gis_tmpl != gis_tmpls[-1]:
|
||||
continue
|
||||
raise
|
||||
|
||||
@@ -81,7 +81,7 @@ class IPrimaIE(InfoExtractor):
|
||||
note='Logging in')
|
||||
|
||||
# a profile may need to be selected first, even when there is only a single one
|
||||
if '/profile-select' in login_handle.geturl():
|
||||
if '/profile-select' in login_handle.url:
|
||||
profile_id = self._search_regex(
|
||||
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
|
||||
|
||||
@@ -89,7 +89,7 @@ class IPrimaIE(InfoExtractor):
|
||||
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
|
||||
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
|
||||
|
||||
code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0))
|
||||
code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
|
||||
if not code:
|
||||
raise ExtractorError('Login failed', expected=True)
|
||||
|
||||
@@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor):
|
||||
), webpage, 'real id', group='id', default=None)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data')
|
||||
nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False)
|
||||
video_id = traverse_obj(
|
||||
nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
nuxt_data = self._search_json(
|
||||
r'<script[^>]+\bid=["\']__NUXT_DATA__["\'][^>]*>',
|
||||
webpage, 'nuxt data', None, end_pattern=r'</script>', contains_pattern=r'\[(?s:.+)\]')
|
||||
|
||||
video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False)
|
||||
|
||||
if not video_id:
|
||||
self.raise_no_formats('Unable to extract video ID from webpage')
|
||||
|
||||
|
||||
@@ -499,9 +499,10 @@ class IqIE(InfoExtractor):
|
||||
'tm': tm,
|
||||
'qdy': 'a',
|
||||
'qds': 0,
|
||||
'k_ft1': 141287244169348,
|
||||
'k_ft4': 34359746564,
|
||||
'k_ft5': 1,
|
||||
'k_ft1': '143486267424900',
|
||||
'k_ft4': '1572868',
|
||||
'k_ft7': '4',
|
||||
'k_ft5': '1',
|
||||
'bop': JSON.stringify({
|
||||
'version': '10.0',
|
||||
'dfp': dfp
|
||||
@@ -527,16 +528,24 @@ class IqIE(InfoExtractor):
|
||||
if player_js_cache:
|
||||
return player_js_cache
|
||||
webpack_js_url = self._proto_relative_url(self._search_regex(
|
||||
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
|
||||
|
||||
webpack_map = self._search_json(
|
||||
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
|
||||
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
|
||||
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
|
||||
|
||||
replacement_map = self._search_json(
|
||||
r'["\']\s*\+\(\s*', webpack_js, 'replacement map', video_id,
|
||||
contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\w.-]+["\']\s*,?\s*)+}',
|
||||
end_pattern=r'\[\w+\]\|\|\w+\)\+["\']\.', transform_source=js_to_json,
|
||||
fatal=False) or {}
|
||||
|
||||
for module_index in reversed(webpack_map):
|
||||
real_module = replacement_map.get(module_index) or module_index
|
||||
module_js = self._download_webpage(
|
||||
f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
|
||||
f'https://stc.iqiyipic.com/_next/static/chunks/{real_module}.{webpack_map[module_index]}.js',
|
||||
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
|
||||
if 'vms request' in module_js:
|
||||
self.cache.store('iq', 'player_js', module_js)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -101,8 +101,8 @@ class KakaoIE(InfoExtractor):
|
||||
cdn_api_base, video_id, query=query,
|
||||
note='Downloading video URL for profile %s' % profile_name)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
resp = self._parse_json(e.cause.response.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
merge_dicts,
|
||||
@@ -30,7 +29,7 @@ class KickBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'info_dict': {
|
||||
|
||||
@@ -91,7 +91,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
if song_id not in urlh.url or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
|
||||
|
||||
|
||||
class LA7IE(InfoExtractor):
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
UnsupportedError,
|
||||
determine_ext,
|
||||
@@ -21,10 +22,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class LBRYBaseIE(InfoExtractor):
|
||||
_BASE_URL_REGEX = r'(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||
_BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)'
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||
_OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
headers = {'Content-Type': 'application/json-rpc'}
|
||||
@@ -68,22 +70,82 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
'uploader_id': ('signing_channel', 'name', {str}),
|
||||
})
|
||||
|
||||
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
|
||||
if channel_name and info.get('channel_id'):
|
||||
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
|
||||
if info.get('uploader_id') and info.get('channel_id'):
|
||||
info['channel_url'] = self._permanent_url(url, info['uploader_id'], info['channel_id'])
|
||||
|
||||
return info
|
||||
|
||||
def _fetch_page(self, display_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
**params,
|
||||
}
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', display_id, page_params, f'page {page}')
|
||||
for item in traverse_obj(result, ('items', lambda _, v: v['name'] and v['claim_id'])):
|
||||
yield {
|
||||
**self._parse_stream(item, url),
|
||||
'_type': 'url',
|
||||
'id': item['claim_id'],
|
||||
'url': self._permanent_url(url, item['name'], item['claim_id']),
|
||||
}
|
||||
|
||||
def _playlist_entries(self, url, display_id, claim_param, metadata):
|
||||
qs = parse_qs(url)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'claim_type': 'stream',
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
**claim_param,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, display_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id, **traverse_obj(metadata, ('value', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
})))
|
||||
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
[^$@:/?#]+/{LBRYBaseIE._CLAIM_ID_REGEX}
|
||||
|(?:@{LBRYBaseIE._OPT_CLAIM_ID}/)?{LBRYBaseIE._OPT_CLAIM_ID}
|
||||
)'''
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': 'fffd15d76062e9a985c22c7c7f2f4805',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
@@ -97,6 +159,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
'height': 720,
|
||||
'thumbnail': 'https://spee.ch/7/67f2d809c263288c.png',
|
||||
'license': 'None',
|
||||
'uploader_id': '@Mantega',
|
||||
'duration': 346,
|
||||
'channel': 'LBRY/Odysee rats united!!!',
|
||||
'channel_id': '1c8ad6a2ab4e889a71146ae4deeb23bb92dab627',
|
||||
@@ -130,11 +193,11 @@ class LBRYIE(LBRYBaseIE):
|
||||
'vcodec': 'none',
|
||||
'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png',
|
||||
'license': 'None',
|
||||
'uploader_id': '@LBRYFoundation',
|
||||
}
|
||||
}, {
|
||||
# HLS
|
||||
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
|
||||
'md5': '25049011f3c8bc2f8b60ad88a031837e',
|
||||
'md5': 'c35fac796f62a14274b4dc2addb5d0ba',
|
||||
'info_dict': {
|
||||
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
|
||||
'ext': 'mp4',
|
||||
@@ -149,6 +212,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
'channel': 'Gardening In Canada',
|
||||
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'uploader_id': '@gardeningincanada',
|
||||
'formats': 'mincount:3',
|
||||
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
|
||||
'license': 'Copyrighted (contact publisher)',
|
||||
@@ -174,6 +238,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
'formats': 'mincount:1',
|
||||
'thumbnail': 'startswith:https://thumb',
|
||||
'license': 'None',
|
||||
'uploader_id': '@RT',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
@@ -184,12 +249,13 @@ class LBRYIE(LBRYBaseIE):
|
||||
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biotechnological Invasion of Skin (April 2023)',
|
||||
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
|
||||
'description': 'md5:fe28689db2cb7ba3436d819ac3ffc378',
|
||||
'channel': 'Wicked Truths',
|
||||
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
|
||||
'timestamp': 1685790036,
|
||||
'upload_date': '20230603',
|
||||
'uploader_id': '@wickedtruths',
|
||||
'timestamp': 1695114347,
|
||||
'upload_date': '20230919',
|
||||
'release_timestamp': 1685617473,
|
||||
'release_date': '20230601',
|
||||
'duration': 1063,
|
||||
@@ -229,10 +295,10 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
if display_id.startswith('$/'):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
if display_id.startswith('@'):
|
||||
display_id = display_id.replace(':', '#')
|
||||
else:
|
||||
display_id = display_id.replace('/', ':')
|
||||
display_id = urllib.parse.unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
@@ -246,12 +312,13 @@ class LBRYIE(LBRYBaseIE):
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request returns original video/audio file if available
|
||||
ext = urlhandle_detect_ext(self._request_webpage(
|
||||
streaming_url, display_id, 'Checking for original quality', headers=headers))
|
||||
if ext != 'm3u8':
|
||||
# GET request to v3 API returns original video/audio file if available
|
||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||
urlh = self._request_webpage(
|
||||
direct_url, display_id, 'Checking for original quality', headers=headers, fatal=False)
|
||||
if urlh and urlhandle_detect_ext(urlh) != 'm3u8':
|
||||
formats.append({
|
||||
'url': streaming_url,
|
||||
'url': direct_url,
|
||||
'format_id': 'original',
|
||||
'quality': 1,
|
||||
**traverse_obj(result, ('value', {
|
||||
@@ -266,7 +333,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
# HEAD request returns redirect response to m3u8 URL if available
|
||||
final_url = self._request_webpage(
|
||||
HEADRequest(streaming_url), display_id, headers=headers,
|
||||
note='Downloading streaming redirect url info').geturl()
|
||||
note='Downloading streaming redirect url info').url
|
||||
|
||||
elif result.get('value_type') == 'stream':
|
||||
claim_id, is_live = result['signing_channel']['claim_id'], True
|
||||
@@ -298,7 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
'info_dict': {
|
||||
@@ -314,65 +381,50 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
'url': 'lbry://@lbry#3f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, params, page):
|
||||
page += 1
|
||||
page_params = {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}
|
||||
page_params.update(params)
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, page_params, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
if not (stream_claim_name and stream_claim_id):
|
||||
continue
|
||||
|
||||
yield {
|
||||
**self._parse_stream(item, url),
|
||||
'_type': 'url',
|
||||
'id': stream_claim_id,
|
||||
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
result = self._resolve_url(f'lbry://{display_id}', display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
qs = parse_qs(url)
|
||||
content = qs.get('content', [None])[0]
|
||||
params = {
|
||||
'fee_amount': qs.get('fee_amount', ['>=0'])[0],
|
||||
'order_by': {
|
||||
'new': ['release_time'],
|
||||
'top': ['effective_amount'],
|
||||
'trending': ['trending_group', 'trending_mixed'],
|
||||
}[qs.get('order', ['new'])[0]],
|
||||
'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
|
||||
}
|
||||
duration = qs.get('duration', [None])[0]
|
||||
if duration:
|
||||
params['duration'] = {
|
||||
'long': '>=1200',
|
||||
'short': '<=240',
|
||||
}[duration]
|
||||
language = qs.get('language', ['all'])[0]
|
||||
if language != 'all':
|
||||
languages = [language]
|
||||
if language == 'en':
|
||||
languages.append('none')
|
||||
params['any_languages'] = languages
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url, params),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
entries, claim_id, result_value.get('title'),
|
||||
result_value.get('description'))
|
||||
|
||||
return self._playlist_entries(url, claim_id, {'channel_ids': [claim_id]}, result)
|
||||
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
'info_dict': {
|
||||
'id': 'ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
'title': 'Théâtre Classique',
|
||||
'description': 'Théâtre Classique',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://odysee.com/$/list/9c6658b3dd21e4f2a0602d523a13150e2b48b770',
|
||||
'info_dict': {
|
||||
'id': '9c6658b3dd21e4f2a0602d523a13150e2b48b770',
|
||||
'title': 'Social Media Exposed',
|
||||
'description': 'md5:98af97317aacd5b85d595775ea37d80e',
|
||||
},
|
||||
'playlist_mincount': 34,
|
||||
}, {
|
||||
'url': 'https://odysee.com/$/playlist/938fb11d-215f-4d1c-ad64-723954df2184',
|
||||
'info_dict': {
|
||||
'id': '938fb11d-215f-4d1c-ad64-723954df2184',
|
||||
},
|
||||
'playlist_mincount': 1000,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
result = traverse_obj(self._call_api_proxy('claim_search', display_id, {
|
||||
'claim_ids': [display_id],
|
||||
'no_totals': True,
|
||||
'page': 1,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
}, 'playlist'), ('items', 0))
|
||||
claim_param = {'claim_ids': traverse_obj(result, ('value', 'claims', ..., {str}))}
|
||||
|
||||
return self._playlist_entries(url, display_id, claim_param, result)
|
||||
|
||||
@@ -25,7 +25,7 @@ class LecturioBaseIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(url_handle):
|
||||
return self._LOGIN_URL not in url_handle.geturl()
|
||||
return self._LOGIN_URL not in url_handle.url
|
||||
|
||||
# Already logged in
|
||||
if is_logged(urlh):
|
||||
@@ -57,8 +57,8 @@ class LecturioIE(LecturioBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
app\.lecturio\.com/([^/?#]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/(?:[^/?#]+/)+(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -73,6 +73,9 @@ class LecturioIE(LecturioBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-at-1-staatsexamen/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -75,7 +75,7 @@ class LEGOIE(InfoExtractor):
|
||||
'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
|
||||
}, headers=self.geo_verification_headers())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 451:
|
||||
self.raise_geo_restricted(countries=countries)
|
||||
raise
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
@@ -69,8 +69,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method,
|
||||
fatal=fatal, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
if error == 'CountryDisabled':
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
@@ -2,11 +2,8 @@ import json
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_b64decode, compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
@@ -107,7 +104,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'sso': 'true',
|
||||
})
|
||||
|
||||
login_state_url = urlh.geturl()
|
||||
login_state_url = urlh.url
|
||||
|
||||
try:
|
||||
login_page = self._download_webpage(
|
||||
@@ -119,8 +116,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'Referer': login_state_url,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
message = error.get('description') or error['code']
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
@@ -137,7 +134,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
})
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', urlh.geturl(),
|
||||
r'access_token=([^=&]+)', urlh.url,
|
||||
'access token', default=None)
|
||||
if not access_token:
|
||||
access_token = self._parse_json(
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
class LiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
|
||||
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
@@ -21,16 +21,18 @@ class LiTVIE(InfoExtractor):
|
||||
'id': 'VOD00041606',
|
||||
'title': '花千骨',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'playlist_count': 51, # 50 episodes + 1 trailer
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'md5': '969e343d9244778cb29acec608e53640',
|
||||
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041610',
|
||||
'ext': 'mp4',
|
||||
'title': '花千骨第1集',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
|
||||
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
|
||||
'categories': ['奇幻', '愛情', '中國', '仙俠'],
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
@@ -46,20 +48,17 @@ class LiTVIE(InfoExtractor):
|
||||
'title': '芈月傳第1集 霸星芈月降世楚國',
|
||||
'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。',
|
||||
},
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
'skip': 'No longer exists',
|
||||
}]
|
||||
|
||||
def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
|
||||
episode_title = program_info['title']
|
||||
content_id = season_list['contentId']
|
||||
|
||||
def _extract_playlist(self, playlist_data, content_type):
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
|
||||
self._URL_TEMPLATE % (content_type, episode['contentId']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in season_list['episode']]
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
|
||||
|
||||
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@@ -68,24 +67,31 @@ class LiTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._search_regex(
|
||||
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
|
||||
webpage, 'meta refresh redirect', default=False, group=0):
|
||||
raise ExtractorError('No such content found', expected=True)
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
|
||||
season_list = list(program_info.get('seasonList', {}).values())
|
||||
playlist_id = traverse_obj(season_list, 0, 'contentId')
|
||||
if self._yes_playlist(playlist_id, video_id, smuggled_data):
|
||||
return self._extract_playlist(season_list[0], video_id, program_info)
|
||||
|
||||
# In browsers `getMainUrl` request is always issued. Usually this
|
||||
# In browsers `getProgramInfo` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If georestricted, there are no embedded data, so an extra request is
|
||||
# necessary to get the error code
|
||||
# If, for some reason, there are no embedded data, we do an extra request.
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
series_id = program_info['seriesId']
|
||||
if self._yes_playlist(series_id, video_id, smuggled_data):
|
||||
playlist_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
|
||||
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
|
||||
return self._extract_playlist(playlist_data, program_info['contentType'])
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
@@ -96,7 +102,7 @@ class LiTVIE(InfoExtractor):
|
||||
'contentType': program_info['contentType'],
|
||||
}
|
||||
video_data = self._download_json(
|
||||
'https://www.litv.tv/vod/getMainUrl', video_id,
|
||||
'https://www.litv.tv/vod/ajax/getMainUrlNoAuth', video_id,
|
||||
data=json.dumps(payload).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
|
||||
@@ -80,7 +80,8 @@ class LivestreamIE(InfoExtractor):
|
||||
}]
|
||||
_API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s'
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base_ele = find_xpath_attr(
|
||||
smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
|
||||
base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
|
||||
@@ -104,7 +105,7 @@ class LivestreamIE(InfoExtractor):
|
||||
'tbr': tbr,
|
||||
'preference': -1000, # Strictly inferior than all other formats?
|
||||
})
|
||||
return formats
|
||||
return formats, {}
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_id = compat_str(video_data['id'])
|
||||
|
||||
50
yt_dlp/extractor/magellantv.py
Normal file
50
yt_dlp/extractor/magellantv.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_age_limit, parse_duration, traverse_obj
|
||||
|
||||
|
||||
class MagellanTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magellantv\.com/(?:watch|video)/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magellantv.com/watch/my-dads-on-death-row?type=v',
|
||||
'info_dict': {
|
||||
'id': 'my-dads-on-death-row',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Dad\'s On Death Row',
|
||||
'description': 'md5:33ba23b9f0651fc4537ed19b1d5b0d7a',
|
||||
'duration': 3780.0,
|
||||
'age_limit': 14,
|
||||
'tags': ['Justice', 'Reality', 'United States', 'True Crime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/video/james-bulger-the-new-revelations',
|
||||
'info_dict': {
|
||||
'id': 'james-bulger-the-new-revelations',
|
||||
'ext': 'mp4',
|
||||
'title': 'James Bulger: The New Revelations',
|
||||
'description': 'md5:7b97922038bad1d0fe8d0470d8a189f2',
|
||||
'duration': 2640.0,
|
||||
'age_limit': 0,
|
||||
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('metadata', 'description', {str}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'age_limit': ('ratingCategory', {parse_age_limit}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
}),
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
@@ -140,17 +141,15 @@ class MailRuIE(InfoExtractor):
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
headers = {}
|
||||
|
||||
video_key = self._get_cookies('https://my.mail.ru').get('video_key')
|
||||
if video_key:
|
||||
headers['Cookie'] = 'video_key=%s' % video_key.value
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
if video_key:
|
||||
self._set_cookie(urllib.parse.urlparse(video_url).hostname, 'video_key', video_key.value)
|
||||
format_id = f.get('key')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||
@@ -158,7 +157,6 @@ class MailRuIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'http_headers': headers,
|
||||
})
|
||||
|
||||
meta_data = video_data['meta']
|
||||
|
||||
@@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://massengeschmack.tv/play/fktv202',
|
||||
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
|
||||
'md5': '9996f314994a49fefe5f39aa1b07ae21',
|
||||
'info_dict': {
|
||||
'id': 'fktv202',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fernsehkritik-TV - Folge 202',
|
||||
'title': 'Fernsehkritik-TV #202',
|
||||
'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
|
||||
},
|
||||
}
|
||||
|
||||
@@ -29,9 +30,6 @@ class MassengeschmackTVIE(InfoExtractor):
|
||||
episode = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, episode)
|
||||
title = clean_html(self._html_search_regex(
|
||||
'<h3>([^<]+)</h3>', webpage, 'title'))
|
||||
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
|
||||
|
||||
formats = []
|
||||
@@ -67,7 +65,8 @@ class MassengeschmackTVIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': episode,
|
||||
'title': title,
|
||||
'title': clean_html(self._html_search_regex(
|
||||
r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
|
||||
}
|
||||
|
||||
@@ -81,10 +81,24 @@ class MediaiteIE(InfoExtractor):
|
||||
'upload_date': '20210930',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://www.mediaite.com/politics/i-cant-read-it-fast-enough-while-defending-trump-larry-kudlow-overwhelmed-by-volume-of-ex-presidents-legal-troubles/',
|
||||
'info_dict': {
|
||||
'id': 'E6EhDX5z',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fox Business Network - 4:00 PM - 5:00 PM - 1:39:42 pm - 1:42:20 pm',
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/E6EhDX5z/poster.jpg?width=720',
|
||||
'duration': 157,
|
||||
'timestamp': 1691015535,
|
||||
'upload_date': '20230802',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, None)
|
||||
id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id')
|
||||
data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id)
|
||||
video_id = self._search_regex(
|
||||
[r'"https://cdn\.jwplayer\.com/players/(\w+)', r'data-video-id\s*=\s*\"([^\"]+)\"'], webpage, 'id')
|
||||
data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{video_id}', video_id)
|
||||
return self._parse_jwplayer_data(data_json)
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from ..utils import (
|
||||
unified_strdate
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -15,7 +18,7 @@ class MediaKlikkIE(InfoExtractor):
|
||||
(?P<id>[^/#?_]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
# mediaklikk. date in html.
|
||||
# (old) mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
|
||||
'info_dict': {
|
||||
'id': '4754129',
|
||||
@@ -23,9 +26,21 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20210901',
|
||||
'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# mediaklikk. date in html.
|
||||
'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
|
||||
'info_dict': {
|
||||
'id': '6696133',
|
||||
'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
|
||||
'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230903',
|
||||
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
|
||||
}
|
||||
}, {
|
||||
# m4sport
|
||||
# (old) m4sport
|
||||
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
|
||||
'info_dict': {
|
||||
'id': '4754999',
|
||||
@@ -33,6 +48,18 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20210830',
|
||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to 404 page',
|
||||
}, {
|
||||
# m4sport
|
||||
'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
|
||||
'info_dict': {
|
||||
'id': '6711136',
|
||||
'title': 'Atlétika – Gyémánt Liga, Brüsszel',
|
||||
'display_id': 'atletika-gyemant-liga-brusszel',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230908',
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
|
||||
}
|
||||
}, {
|
||||
# m4sport with *video/ url and no date
|
||||
@@ -40,20 +67,33 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '4492099',
|
||||
'title': 'Real Madrid - Chelsea 1-1',
|
||||
'display_id': 'real-madrid-chelsea-1-1',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
||||
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
|
||||
}
|
||||
}, {
|
||||
# hirado
|
||||
# (old) hirado
|
||||
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
|
||||
'info_dict': {
|
||||
'id': '4760120',
|
||||
'title': 'Feltételeket szabott a főváros',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to video list page',
|
||||
}, {
|
||||
# hirado
|
||||
'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||
'info_dict': {
|
||||
'id': '6716068',
|
||||
'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
|
||||
'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230911',
|
||||
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
|
||||
}
|
||||
}, {
|
||||
# petofilive
|
||||
# (old) petofilive
|
||||
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
|
||||
'info_dict': {
|
||||
'id': '4571948',
|
||||
@@ -61,6 +101,18 @@ class MediaKlikkIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20210607',
|
||||
'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
|
||||
},
|
||||
'skip': 'Webpage redirects to empty page',
|
||||
}, {
|
||||
# petofilive
|
||||
'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
|
||||
'info_dict': {
|
||||
'id': '6713233',
|
||||
'title': 'Futball Fesztivál a Margitszigeten',
|
||||
'display_id': 'futball-fesztival-a-margitszigeten',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230909',
|
||||
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -84,8 +136,12 @@ class MediaKlikkIE(InfoExtractor):
|
||||
|
||||
player_data['video'] = player_data.pop('token')
|
||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
||||
playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
|
||||
self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
|
||||
player_json = self._search_json(
|
||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||
playlist_url = traverse_obj(
|
||||
player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
|
||||
if not playlist_url:
|
||||
raise ExtractorError('Unable to extract playlist url')
|
||||
|
||||
formats = self._extract_wowza_formats(
|
||||
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
|
||||
|
||||
@@ -127,7 +127,8 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}, {
|
||||
# WittyTV embed
|
||||
'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/',
|
||||
@@ -154,10 +155,12 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}
|
||||
}]
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
|
||||
return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
|
||||
return super(MediasetIE, self)._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
|
||||
|
||||
def _check_drm_formats(self, tp_formats, video_id):
|
||||
has_nondrm, drm_manifest = False, ''
|
||||
|
||||
@@ -171,7 +171,7 @@ class MediasiteIE(InfoExtractor):
|
||||
query = mobj.group('query')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||
redirect_url = urlh.geturl()
|
||||
redirect_url = urlh.url
|
||||
|
||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||
|
||||
@@ -14,7 +14,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
@@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
for message in [
|
||||
'Debido a tu ubicación no puedes ver el contenido',
|
||||
'You are not allowed to watch this video: Geo Fencing Restriction'
|
||||
]:
|
||||
if message in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
HEADRequest,
|
||||
parse_qs,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -160,5 +160,5 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
|
||||
canonical_url = self._request_webpage(
|
||||
HEADRequest(canonical_url), video_id,
|
||||
note='Resolve canonical URL',
|
||||
errnote='Could not resolve canonical URL').geturl()
|
||||
errnote='Could not resolve canonical URL').url
|
||||
return self.url_result(canonical_url, MegaTVComIE.ie_key(), video_id)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user