mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-24 00:49:06 +00:00
Merge branch 'master' into yt-live-from-start-range
This commit is contained in:
@@ -138,6 +138,10 @@ from .ard import (
|
||||
ARDMediathekCollectionIE,
|
||||
ARDIE,
|
||||
)
|
||||
from .art19 import (
|
||||
Art19IE,
|
||||
Art19ShowIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
@@ -253,6 +257,7 @@ from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
@@ -369,11 +374,11 @@ from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudycdn import CloudyCDNIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
@@ -564,6 +569,7 @@ from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .err import ERRJupiterIE
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
@@ -588,6 +594,7 @@ from .facebook import (
|
||||
FacebookPluginsVideoIE,
|
||||
FacebookRedirectURLIE,
|
||||
FacebookReelIE,
|
||||
FacebookAdsIE,
|
||||
)
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
@@ -610,6 +617,7 @@ from .filmon import (
|
||||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flextv import FlexTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .floatplane import (
|
||||
FloatplaneIE,
|
||||
@@ -1000,6 +1008,11 @@ from .lrt import (
|
||||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lsm import (
|
||||
LSMLREmbedIE,
|
||||
LSMLTVEmbedIE,
|
||||
LSMReplayIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
@@ -1111,6 +1124,7 @@ from .motherless import (
|
||||
MotherlessIE,
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
MotherlessUploaderIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
@@ -1137,6 +1151,11 @@ from .musicdex import (
|
||||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
Mx3NeoIE,
|
||||
Mx3VolksmusikIE,
|
||||
)
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
MxplayerShowIE,
|
||||
@@ -1229,7 +1248,10 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfb import (
|
||||
NFBIE,
|
||||
NFBSeriesIE,
|
||||
)
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
@@ -1266,6 +1288,7 @@ from .niconico import (
|
||||
NicovideoTagURLIE,
|
||||
NiconicoLiveIE,
|
||||
)
|
||||
from .ninaprotocol import NinaProtocolIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
@@ -1330,6 +1353,12 @@ from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
NYTimesCookingIE,
|
||||
NYTimesCookingRecipeIE,
|
||||
)
|
||||
from .nuum import (
|
||||
NuumLiveIE,
|
||||
NuumTabIE,
|
||||
NuumMediaIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzherald import NZHeraldIE
|
||||
@@ -1372,6 +1401,7 @@ from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4StoryIE,
|
||||
ORFONIE,
|
||||
ORFRadioIE,
|
||||
ORFPodcastIE,
|
||||
ORFIPTVIE,
|
||||
@@ -1496,7 +1526,7 @@ from .puhutv import (
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .prankcast import PrankCastIE, PrankCastPostIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
@@ -1593,6 +1623,7 @@ from .redbulltv import (
|
||||
RedBullIE,
|
||||
)
|
||||
from .reddit import RedditIE
|
||||
from .redge import RedCDNLivxIE
|
||||
from .redgifs import (
|
||||
RedGifsIE,
|
||||
RedGifsSearchIE,
|
||||
@@ -1727,6 +1758,7 @@ from .scte import (
|
||||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .sejmpl import SejmIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
@@ -2289,11 +2321,6 @@ from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wasdtv import (
|
||||
WASDTVStreamIE,
|
||||
WASDTVRecordIE,
|
||||
WASDTVClipIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
@@ -2472,6 +2499,7 @@ from .zee5 import (
|
||||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
|
||||
@@ -3,6 +3,7 @@ import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
@@ -185,7 +187,10 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
@@ -267,6 +272,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
|
||||
@@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
|
||||
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
|
||||
'display_id': 'k0srjLSkga8.webm',
|
||||
'release_date': '20180403',
|
||||
'creator': 'Virginie Vota',
|
||||
'creators': ['Virginie Vota'],
|
||||
'release_year': 2018,
|
||||
'upload_date': '20230318',
|
||||
'uploader': 'admin@altcensored.com',
|
||||
@@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
|
||||
'duration': 926.09,
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'categories': ['News & Politics'], # FIXME
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
|
||||
'title': 'Virginie Vota',
|
||||
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
|
||||
},
|
||||
'playlist_count': 91
|
||||
'playlist_count': 85,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
'info_dict': {
|
||||
'title': 'yukikaze775',
|
||||
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
|
||||
},
|
||||
'playlist_count': 4
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
|
||||
'info_dict': {
|
||||
'title': 'Mister Metokur',
|
||||
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
|
||||
},
|
||||
'playlist_count': 121,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
|
||||
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
|
||||
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
|
||||
page_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
|
||||
r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
|
||||
webpage, 'page count', default='1'))
|
||||
|
||||
def page_func(page_num):
|
||||
|
||||
@@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
'timestamp': 1666166520,
|
||||
'upload_date': '20221019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
@@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
_API_PATH = '/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
||||
|
||||
@@ -300,7 +300,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
@@ -238,6 +239,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
@@ -359,12 +361,27 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
|
||||
'embedded': 'false',
|
||||
'mcV6': 'true',
|
||||
})
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
@@ -383,7 +400,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_no_formats('This video is only available after 22:00', expected=True)
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
303
yt_dlp/extractor/art19.py
Normal file
303
yt_dlp/extractor/art19.py
Normal file
@@ -0,0 +1,303 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Art19IE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||
_VALID_URL = [
|
||||
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||
'info_dict': {
|
||||
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'ext': 'mp3',
|
||||
'title': 'Why Did DeSantis Drop Out?',
|
||||
'series': 'The Daily Briefing',
|
||||
'release_timestamp': 1705941275,
|
||||
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||
'episode': 'Episode 582',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||
'upload_date': '20240122',
|
||||
'timestamp': 1705940815,
|
||||
'episode_number': 582,
|
||||
'modified_date': '20240122',
|
||||
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'modified_timestamp': 1705941275,
|
||||
'release_date': '20240122',
|
||||
'duration': 527.4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'info_dict': {
|
||||
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||
'modified_date': '20240116',
|
||||
'upload_date': '20240105',
|
||||
'modified_timestamp': 1705435802,
|
||||
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||
'release_timestamp': 1705305660,
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1704481536,
|
||||
'episode_number': 88,
|
||||
'series': 'Scamfluencers',
|
||||
'duration': 2588.37501,
|
||||
'episode': 'Episode 88',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||
'info_dict': {
|
||||
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'ext': 'mp3',
|
||||
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||
'season': 'Seizoen 6',
|
||||
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'duration': 3061.82111,
|
||||
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||
'release_date': '20231126',
|
||||
'modified_timestamp': 1701156004,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'season_number': 6,
|
||||
'episode_number': 52,
|
||||
'modified_date': '20231128',
|
||||
'upload_date': '20231126',
|
||||
'timestamp': 1701025981,
|
||||
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||
'series': 'De Boordradio',
|
||||
'release_timestamp': 1701026308,
|
||||
'episode': 'Episode 52',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||
'info_dict': {
|
||||
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Bucshon announces retirement from congress',
|
||||
'upload_date': '20240115',
|
||||
'episode_number': 148,
|
||||
'episode': 'Episode 148',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1705328205,
|
||||
'release_timestamp': 1705329275,
|
||||
'series': 'All INdiana Politics',
|
||||
'modified_date': '20240117',
|
||||
'modified_timestamp': 1705458901,
|
||||
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||
'duration': 1256.18848,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for episode_id in re.findall(
|
||||
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
|
||||
player_metadata = self._download_json(
|
||||
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||
note='Downloading player metadata', fatal=False,
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
rss_metadata = self._download_json(
|
||||
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||
note='Downloading RSS metadata')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
}]
|
||||
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||
if fmt_id == 'waveform_bin':
|
||||
continue
|
||||
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||
if not fmt_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': fmt_id,
|
||||
'url': fmt_url,
|
||||
'vcodec': 'none',
|
||||
'acodec': fmt_id,
|
||||
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'episode_id': ('id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
'description': ('episode_description_plain', {str}),
|
||||
'episode_id': ('episode_id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'thumbnail': ('cover_image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class Art19ShowIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||
_VALID_URL = [
|
||||
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://rss.art19.com/scamfluencers',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'display_id': 'scamfluencers',
|
||||
'title': 'Scamfluencers',
|
||||
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||
'timestamp': 1647368573,
|
||||
'upload_date': '20220315',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||
'display_id': 'enthuellt',
|
||||
'title': 'Enthüllt',
|
||||
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||
'timestamp': 1601645860,
|
||||
'upload_date': '20201002',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||
'display_id': 'deconstructing-yourself',
|
||||
'title': 'Deconstructing Yourself',
|
||||
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||
'timestamp': 1570581181,
|
||||
'upload_date': '20191009',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||
'display_id': 'the-ben-joravsky-show',
|
||||
'title': 'The Ben Joravsky Show',
|
||||
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||
'timestamp': 1550875095,
|
||||
'upload_date': '20190222',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||
},
|
||||
'playlist_mincount': 1900,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for series_id in re.findall(
|
||||
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||
yield f'https://art19.com/shows/{series_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_metadata = self._download_json(
|
||||
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [
|
||||
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||
],
|
||||
**traverse_obj(series_metadata, ('series', {
|
||||
'id': ('id', {str}),
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import math
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
@@ -1304,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
|
||||
'info_dict': {
|
||||
'id': 'BV1DU4y1r7tz',
|
||||
'ext': 'mp4',
|
||||
'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
|
||||
'upload_date': '20220820',
|
||||
'description': '',
|
||||
'timestamp': 1661016330,
|
||||
'uploader_id': '1958703906',
|
||||
'uploader': '靡烟miya',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'duration': 9552.903,
|
||||
'tags': list,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 687146339_part1'],
|
||||
},
|
||||
'params': {'noplaylist': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
@@ -1355,6 +1376,11 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
bvid = traverse_obj(parse_qs(url), ('bvid', 0))
|
||||
if not self._yes_playlist(list_id, bvid):
|
||||
return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
|
||||
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
@@ -1464,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Bilibili video search'
|
||||
_MAX_RESULTS = 100000
|
||||
_SEARCH_KEY = 'bilisearch'
|
||||
_TESTS = [{
|
||||
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'playlist_count': 3,
|
||||
'info_dict': {
|
||||
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1n44y1Q7sc',
|
||||
'ext': 'mp4',
|
||||
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
|
||||
'timestamp': 1669889987,
|
||||
'upload_date': '20221201',
|
||||
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
|
||||
'tags': list,
|
||||
'uploader': '靡烟miya',
|
||||
'duration': 123.156,
|
||||
'uploader_id': '1958703906',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 988222410_part1'],
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _search_results(self, query):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||
for page_num in itertools.count(1):
|
||||
videos = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/search/type', query,
|
||||
@@ -1941,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
|
||||
209
yt_dlp/extractor/boosty.py
Normal file
209
yt_dlp/extractor/boosty.py
Normal file
@@ -0,0 +1,209 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoostyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# single ok_video
|
||||
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
|
||||
'info_dict': {
|
||||
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
|
||||
'title': 'phasma_3',
|
||||
'channel': 'Kuplinov',
|
||||
'channel_id': '7958701',
|
||||
'timestamp': 1655031975,
|
||||
'upload_date': '20220612',
|
||||
'release_timestamp': 1655049000,
|
||||
'release_date': '20220612',
|
||||
'modified_timestamp': 1668680993,
|
||||
'modified_date': '20221117',
|
||||
'tags': ['куплинов', 'phasmophobia'],
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 105,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
# multiple ok_video
|
||||
'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'info_dict': {
|
||||
'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31204,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 25704,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31867,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# single external video (youtube)
|
||||
'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
|
||||
'info_dict': {
|
||||
'id': 'EXelTnve5lY',
|
||||
'title': 'Послание Президента Федеральному Собранию | Класс народа',
|
||||
'upload_date': '20210425',
|
||||
'channel': 'Денис Чужой',
|
||||
'tags': 'count:10',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 816,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.ytimg\.com/',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'categories': list,
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
|
||||
'channel_is_verified': bool,
|
||||
'channel_url': r're:^https://www\.youtube\.com/',
|
||||
'comment_count': int,
|
||||
'description': str,
|
||||
'heatmap': 'count:100',
|
||||
'live_status': str,
|
||||
'playable_in_embed': bool,
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
'uploader_url': r're:^https://www\.youtube\.com/',
|
||||
},
|
||||
}]
|
||||
|
||||
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
|
||||
|
||||
def _extract_formats(self, player_urls, video_id):
|
||||
formats = []
|
||||
quality = qualities(self._MP4_TYPES)
|
||||
for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
|
||||
url = player_url['url']
|
||||
format_type = player_url.get('type')
|
||||
if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
|
||||
elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
|
||||
formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_type in self._MP4_TYPES:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_type,
|
||||
'quality': quality(format_type),
|
||||
})
|
||||
else:
|
||||
self.report_warning(f'Unknown format type: {format_type!r}')
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||
post = self._download_json(
|
||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
post_title = post.get('title')
|
||||
if not post_title:
|
||||
self.report_warning('Unable to extract post title. Falling back to parsing html page')
|
||||
webpage = self._download_webpage(url, video_id=post_id)
|
||||
post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
common_metadata = {
|
||||
'title': post_title,
|
||||
**traverse_obj(post, {
|
||||
'channel': ('user', 'name', {str}),
|
||||
'channel_id': ('user', 'id', {str_or_none}),
|
||||
'timestamp': ('createdAt', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'modified_timestamp': ('updatedAt', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'like_count': ('count', 'likes', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
entries = []
|
||||
for item in traverse_obj(post, ('data', ..., {dict})):
|
||||
item_type = item.get('type')
|
||||
if item_type == 'video' and url_or_none(item.get('url')):
|
||||
entries.append(self.url_result(item['url'], YoutubeIE))
|
||||
elif item_type == 'ok_video':
|
||||
video_id = item.get('id') or post_id
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_formats(item.get('playerUrls'), video_id),
|
||||
**common_metadata,
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('viewsCounter', {int_or_none}),
|
||||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||
}, get_all=False)})
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, post_id, post_title, **common_metadata)
|
||||
@@ -1,6 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
@@ -60,6 +61,7 @@ class CCMAIE(InfoExtractor):
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
})
|
||||
|
||||
formats = []
|
||||
@@ -69,6 +71,10 @@ class CCMAIE(InfoExtractor):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, media_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
f.update({
|
||||
|
||||
@@ -67,7 +67,10 @@ class CineverseIE(CineverseBaseIE):
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
err_code = idetails.get('err_code')
|
||||
if err_code == 1002:
|
||||
self.raise_login_required()
|
||||
elif err_code == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
|
||||
@@ -4,27 +4,25 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?%s/|
|
||||
%s
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
@@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
'info_dict': {
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
79
yt_dlp/extractor/cloudycdn.py
Normal file
79
yt_dlp/extractor/cloudycdn.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
'md5': '798828a479151e2444d8dcfbec76e482',
|
||||
'info_dict': {
|
||||
'id': '26e_lv-8-5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
|
||||
'info_dict': {
|
||||
'id': 'cqd_lib-2',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('name', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -1,68 +1,97 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class CNBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
|
||||
'info_dict': {
|
||||
'id': '3000503714',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fighting zombies is big business',
|
||||
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
|
||||
'timestamp': 1459332000,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
'id': '107344774',
|
||||
'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
|
||||
'modified_timestamp': 1702053483,
|
||||
'timestamp': 1701977810,
|
||||
'channel': 'News Videos',
|
||||
'upload_date': '20231207',
|
||||
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
|
||||
'release_timestamp': 1701977375,
|
||||
'modified_date': '20231208',
|
||||
'release_date': '20231207',
|
||||
'duration': 65,
|
||||
'author': 'Sean Conlon',
|
||||
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
|
||||
'info_dict': {
|
||||
'author': 'Jim Cramer',
|
||||
'channel': 'Mad Money with Jim Cramer',
|
||||
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
|
||||
'duration': 299.0,
|
||||
'ext': 'mp4',
|
||||
'id': '107345451',
|
||||
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
|
||||
'timestamp': 1702080139,
|
||||
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
|
||||
'release_date': '20231208',
|
||||
'upload_date': '20231209',
|
||||
'modified_timestamp': 1702080139,
|
||||
'modified_date': '20231209',
|
||||
'release_timestamp': 1702073551,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
|
||||
'info_dict': {
|
||||
'author': 'Jim Cramer',
|
||||
'channel': 'Mad Money with Jim Cramer',
|
||||
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
|
||||
'duration': 113.0,
|
||||
'ext': 'mp4',
|
||||
'id': '107345474',
|
||||
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
|
||||
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
|
||||
'timestamp': 1702080535,
|
||||
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
|
||||
'release_timestamp': 1702077347,
|
||||
'modified_timestamp': 1702080535,
|
||||
'release_date': '20231208',
|
||||
'upload_date': '20231209',
|
||||
'modified_date': '20231209',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = self._match_valid_url(url).groups()
|
||||
video_id = self._download_json(
|
||||
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||
'query': '''{
|
||||
page(path: "%s") {
|
||||
vcpsId
|
||||
}
|
||||
}''' % path,
|
||||
})['data']['page']['vcpsId']
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
|
||||
|
||||
player_data = traverse_obj(data, (
|
||||
'page', 'page', 'layout', ..., 'columns', ..., 'modules',
|
||||
lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
|
||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||
**traverse_obj(player_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'author': ('author', ..., 'name', {str}),
|
||||
'timestamp': ('datePublished', {parse_iso8601}),
|
||||
'release_timestamp': ('uploadDate', {parse_iso8601}),
|
||||
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('section', 'title', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
@@ -247,6 +247,8 @@ class InfoExtractor:
|
||||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
@@ -278,7 +280,7 @@ class InfoExtractor:
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
creators: List of creators of the video.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date in UTC (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp
|
||||
@@ -422,16 +424,16 @@ class InfoExtractor:
|
||||
track_number: Number of the track within an album or a disc, as an integer.
|
||||
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
|
||||
as a unicode string.
|
||||
artist: Artist(s) of the track.
|
||||
genre: Genre(s) of the track.
|
||||
artists: List of artists of the track.
|
||||
composers: List of composers of the piece.
|
||||
genres: List of genres of the track.
|
||||
album: Title of the album the track belongs to.
|
||||
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
|
||||
album_artist: List of all artists appeared on the album (e.g.
|
||||
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
|
||||
and compilations).
|
||||
album_artists: List of all artists appeared on the album.
|
||||
E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
|
||||
Useful for splits and compilations.
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
composer: Composer of the piece
|
||||
|
||||
The following fields should only be set for clips that should be cut from the original video:
|
||||
|
||||
@@ -442,6 +444,18 @@ class InfoExtractor:
|
||||
rows: Number of rows in each storyboard fragment, as an integer
|
||||
columns: Number of columns in each storyboard fragment, as an integer
|
||||
|
||||
The following fields are deprecated and should not be set by new code:
|
||||
composer: Use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
creator: Use "creators" instead.
|
||||
The creator of the video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
@@ -2530,7 +2544,11 @@ class InfoExtractor:
|
||||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._extract_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _extract_mpd_periods(
|
||||
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
|
||||
@@ -2543,17 +2561,16 @@ class InfoExtractor:
|
||||
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return [], {}
|
||||
return []
|
||||
mpd_doc, urlh = res
|
||||
if mpd_doc is None:
|
||||
return [], {}
|
||||
return []
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||
@@ -2561,8 +2578,39 @@ class InfoExtractor:
|
||||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._parse_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _merge_mpd_periods(self, periods):
|
||||
"""
|
||||
Combine all formats and subtitles from an MPD manifest into a single list,
|
||||
by concatenate streams with similar formats.
|
||||
"""
|
||||
formats, subtitles = {}, {}
|
||||
for period in periods:
|
||||
for f in period['formats']:
|
||||
assert 'is_dash_periods' not in f, 'format already processed'
|
||||
f['is_dash_periods'] = True
|
||||
format_key = tuple(v for k, v in f.items() if k not in (
|
||||
('format_id', 'fragments', 'manifest_stream_number')))
|
||||
if format_key not in formats:
|
||||
formats[format_key] = f
|
||||
elif 'fragments' in f:
|
||||
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
|
||||
|
||||
if subtitles and period['subtitles']:
|
||||
self.report_warning(bug_reports_message(
|
||||
'Found subtitles in multiple periods in the DASH manifest; '
|
||||
'if part of the subtitles are missing,'
|
||||
), only_once=True)
|
||||
|
||||
for sub_lang, sub_info in period['subtitles'].items():
|
||||
subtitles.setdefault(sub_lang, []).extend(sub_info)
|
||||
|
||||
return list(formats.values()), subtitles
|
||||
|
||||
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@@ -2643,14 +2691,17 @@ class InfoExtractor:
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
availability_start_time = unified_timestamp(
|
||||
mpd_doc.get('availabilityStartTime'), with_milliseconds=True) or 0
|
||||
formats, subtitles = [], {}
|
||||
stream_numbers = collections.defaultdict(int)
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||
# segmentIngestTime is completely out of spec, but YT Livestream do this
|
||||
segment_ingest_time = period.get('{http://youtube.com/yt/2012/10/10}segmentIngestTime')
|
||||
if segment_ingest_time:
|
||||
availability_start_time = unified_timestamp(segment_ingest_time, with_milliseconds=True)
|
||||
|
||||
period_entry = {
|
||||
'id': period.get('id', f'period-{period_idx}'),
|
||||
'formats': [],
|
||||
'subtitles': collections.defaultdict(list),
|
||||
}
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
@@ -2908,11 +2959,10 @@ class InfoExtractor:
|
||||
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||
stream_numbers[f['url']] += 1
|
||||
formats.append(f)
|
||||
period_entry['formats'].append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
period_entry['subtitles'][lang or 'und'].append(f)
|
||||
yield period_entry
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||
|
||||
@@ -33,10 +33,7 @@ class CrooksAndLiarsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
|
||||
|
||||
manifest = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
|
||||
video_id)
|
||||
manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
|
||||
|
||||
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
encode_base_n,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
@@ -81,6 +83,7 @@ class EpornerIE(InfoExtractor):
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
has_av1 = bool(get_elements_by_class('download-av1', webpage))
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
@@ -106,6 +109,14 @@ class EpornerIE(InfoExtractor):
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
if has_av1:
|
||||
formats.append({
|
||||
'url': src.replace('.mp4', '-av1.mp4'),
|
||||
'format_id': join_nonempty('av1', format_id),
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
'vcodec': 'av1',
|
||||
})
|
||||
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
|
||||
224
yt_dlp/extractor/err.py
Normal file
224
yt_dlp/extractor/err.py
Normal file
@@ -0,0 +1,224 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERRJupiterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Jupiter: Movie: siin-me-oleme',
|
||||
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
|
||||
'md5': '9b45d1682a98853acaa1e1b0c791f425',
|
||||
'info_dict': {
|
||||
'id': '1211107',
|
||||
'ext': 'mp4',
|
||||
'title': 'Siin me oleme!',
|
||||
'alt_title': '',
|
||||
'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
|
||||
'release_date': '20231226',
|
||||
'upload_date': '20201217',
|
||||
'modified_date': '20201217',
|
||||
'release_timestamp': 1703577600,
|
||||
'timestamp': 1608210000,
|
||||
'modified_timestamp': 1608220800,
|
||||
'release_year': 1978,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Series: Impulss',
|
||||
'url': 'https://jupiter.err.ee/1609145945/impulss',
|
||||
'md5': 'a378486df07ed1ba74e46cc861886243',
|
||||
'info_dict': {
|
||||
'id': '1609145945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impulss',
|
||||
'alt_title': 'Loteriipilet hooldekodusse',
|
||||
'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
|
||||
'release_date': '20231107',
|
||||
'upload_date': '20231026',
|
||||
'modified_date': '20231118',
|
||||
'release_timestamp': 1699380000,
|
||||
'timestamp': 1698327601,
|
||||
'modified_timestamp': 1700311802,
|
||||
'series': 'Impulss',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Loteriipilet hooldekodusse',
|
||||
'episode_number': 6,
|
||||
'series_id': '1609108187',
|
||||
'release_year': 2023,
|
||||
'episode_id': '1609145945',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Radio Show: mnemoturniir episode',
|
||||
'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
|
||||
'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
|
||||
'info_dict': {
|
||||
'id': '1037919',
|
||||
'ext': 'm4a',
|
||||
'title': 'Mnemoturniir',
|
||||
'alt_title': '',
|
||||
'description': 'md5:626db52394e7583c26ab74d6a34d9982',
|
||||
'release_date': '20240121',
|
||||
'upload_date': '20240108',
|
||||
'modified_date': '20240121',
|
||||
'release_timestamp': 1705827900,
|
||||
'timestamp': 1704675602,
|
||||
'modified_timestamp': 1705827601,
|
||||
'series': 'Mnemoturniir',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'series_id': '1037919',
|
||||
'release_year': 2024,
|
||||
'episode_id': '1609215101',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
|
||||
'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
|
||||
'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
|
||||
'info_dict': {
|
||||
'id': '1609180445',
|
||||
'ext': 'mp4',
|
||||
'title': 'Более зеленый Таллинн',
|
||||
'alt_title': '',
|
||||
'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
|
||||
'release_date': '20231224',
|
||||
'upload_date': '20231130',
|
||||
'modified_date': '20231207',
|
||||
'release_timestamp': 1703423400,
|
||||
'timestamp': 1701338400,
|
||||
'modified_timestamp': 1701967200,
|
||||
'release_year': 2023,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Series: The Sniffer',
|
||||
'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
|
||||
'md5': '2abdeb7131ce551bce49e8d0cea08536',
|
||||
'info_dict': {
|
||||
'id': '1608311387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Нюхач',
|
||||
'alt_title': '',
|
||||
'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
|
||||
'release_date': '20230601',
|
||||
'upload_date': '20210818',
|
||||
'modified_date': '20210903',
|
||||
'release_timestamp': 1685633400,
|
||||
'timestamp': 1629318000,
|
||||
'modified_timestamp': 1630686000,
|
||||
'release_year': 2013,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1608311390',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Нюхач',
|
||||
'series_id': '1608311387',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
|
||||
'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
|
||||
'md5': '8b46d7e4510b254a14b7a52211b5bf96',
|
||||
'info_dict': {
|
||||
'id': '1608990335',
|
||||
'ext': 'm4a',
|
||||
'title': 'Лесные истории | Аисты',
|
||||
'alt_title': '',
|
||||
'description': 'md5:065e721623e271e7a63e6540d409ca6b',
|
||||
'release_date': '20230609',
|
||||
'upload_date': '20230527',
|
||||
'modified_date': '20230608',
|
||||
'release_timestamp': 1686308700,
|
||||
'timestamp': 1685145600,
|
||||
'modified_timestamp': 1686252600,
|
||||
'release_year': 2023,
|
||||
'episode': 'Episode 0',
|
||||
'episode_id': '1608990335',
|
||||
'episode_number': 0,
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'series': 'Лесные истории | Аисты',
|
||||
'series_id': '1037497',
|
||||
}
|
||||
}, {
|
||||
'note': 'Lasteekraan: Pätu',
|
||||
'url': 'https://lasteekraan.err.ee/1092243/patu',
|
||||
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
|
||||
'info_dict': {
|
||||
'id': '1092243',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pätu',
|
||||
'alt_title': '',
|
||||
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
|
||||
'release_date': '20230614',
|
||||
'upload_date': '20200520',
|
||||
'modified_date': '20200520',
|
||||
'release_timestamp': 1686745800,
|
||||
'timestamp': 1589975640,
|
||||
'modified_timestamp': 1589975640,
|
||||
'release_year': 1990,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1092243',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Pätu',
|
||||
'series_id': '1092236',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
|
||||
query={'contentId': video_id})['data']['mainContent']
|
||||
|
||||
media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
|
||||
if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}, get_all=False),
|
||||
**(traverse_obj(data, {
|
||||
'series': ('heading', {str}),
|
||||
'series_id': ('rootContentId', {str_or_none}),
|
||||
'episode': ('subHeading', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'episode_id': ('id', {str_or_none}),
|
||||
}) if data.get('type') == 'episode' else {}),
|
||||
}
|
||||
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
@@ -43,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
permalink\.php|
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
@@ -52,6 +54,7 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
watchparty/
|
||||
)|
|
||||
@@ -248,6 +251,7 @@ class FacebookIE(InfoExtractor):
|
||||
'duration': 148.435,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
||||
'info_dict': {
|
||||
'id': '6968553779868435',
|
||||
@@ -262,6 +266,22 @@ class FacebookIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1701975646,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
|
||||
'info_dict': {
|
||||
'id': '270103405756416',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lela Evans',
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
||||
'only_matching': True,
|
||||
@@ -380,6 +400,18 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
'id': '637246984455045',
|
||||
'ext': 'mp4',
|
||||
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
|
||||
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
@@ -454,38 +486,10 @@ class FacebookIE(InfoExtractor):
|
||||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
subtitles[locale] = [{'url': captions}]
|
||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
||||
for caption in traverse_obj(captions, (
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
||||
):
|
||||
lang = caption.get('localized_language') or ''
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||
@@ -494,11 +498,16 @@ class FacebookIE(InfoExtractor):
|
||||
description = description or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
@@ -520,8 +529,6 @@ class FacebookIE(InfoExtractor):
|
||||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
@@ -563,7 +570,11 @@ class FacebookIE(InfoExtractor):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
@@ -573,8 +584,8 @@ class FacebookIE(InfoExtractor):
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
@@ -615,6 +626,29 @@ class FacebookIE(InfoExtractor):
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
for caption in traverse_obj(video, (
|
||||
'video_available_captions_locales',
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])},
|
||||
lambda _, v: url_or_none(v['captions_url'])
|
||||
)):
|
||||
lang = caption.get('localized_language') or 'und'
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
|
||||
if captions_url and not automatic_captions and not subtitles:
|
||||
locale = self._html_search_meta(
|
||||
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
|
||||
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
@@ -624,6 +658,8 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
@@ -658,7 +694,8 @@ class FacebookIE(InfoExtractor):
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
video = traverse_obj(data, (
|
||||
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
@@ -677,6 +714,9 @@ class FacebookIE(InfoExtractor):
|
||||
# honor precise duration in video info
|
||||
if video_info.get('duration'):
|
||||
webpage_info['duration'] = video_info['duration']
|
||||
# preserve preferred_thumbnail in video info
|
||||
if video_info.get('thumbnail'):
|
||||
webpage_info['thumbnail'] = video_info['thumbnail']
|
||||
return merge_dicts(webpage_info, video_info)
|
||||
|
||||
if not video_data:
|
||||
@@ -907,3 +947,114 @@ class FacebookReelIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
|
||||
|
||||
|
||||
class FacebookAdsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
|
||||
IE_NAME = 'facebook:ads'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
|
||||
'info_dict': {
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1702548330,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
'id': '893637265423481',
|
||||
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
|
||||
'uploader': 'Eataly Paris Marais',
|
||||
'uploader_id': '2086668958314152',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1703571529,
|
||||
'upload_date': '20231226',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = {
|
||||
'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
|
||||
'video_sd_url': ('sd', None),
|
||||
'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
|
||||
'video_hd_url': ('hd', None),
|
||||
}
|
||||
|
||||
def _extract_formats(self, video_dict):
|
||||
formats = []
|
||||
for format_key, format_url in traverse_obj(video_dict, (
|
||||
{dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
|
||||
)):
|
||||
formats.append({
|
||||
'format_id': self._FORMATS_MAP[format_key][0],
|
||||
'format_note': self._FORMATS_MAP[format_key][1],
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
info_dict.update(entries[0])
|
||||
|
||||
elif len(entries) > 1:
|
||||
info_dict.update({
|
||||
'title': entries[0]['title'],
|
||||
'entries': entries,
|
||||
'_type': 'playlist',
|
||||
})
|
||||
|
||||
info_dict['id'] = video_id
|
||||
|
||||
return info_dict
|
||||
|
||||
62
yt_dlp/extractor/flextv.py
Normal file
62
yt_dlp/extractor/flextv.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FlexTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.flextv.co.kr/channels/231638/live',
|
||||
'info_dict': {
|
||||
'id': '231638',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^214하나만\.\.\. ',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'upload_date': r're:\d{8}',
|
||||
'timestamp': int,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'Hi별',
|
||||
'channel_id': '244396',
|
||||
},
|
||||
'skip': 'The channel is offline',
|
||||
}, {
|
||||
'url': 'https://www.flextv.co.kr/channels/746/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
stream_data = self._download_json(
|
||||
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
|
||||
channel_id, query={'option': 'all'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
raise
|
||||
|
||||
playlist_url = stream_data['sources'][0]['url']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist_url, channel_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(stream_data, {
|
||||
'title': ('stream', 'title', {str}),
|
||||
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
|
||||
'thumbnail': ('thumbUrl', {url_or_none}),
|
||||
'channel': ('owner', 'name', {str}),
|
||||
'channel_id': ('owner', 'id', {str_or_none}),
|
||||
}),
|
||||
}
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
@@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
|
||||
'info_dict': {
|
||||
'id': '65B5PNoBtf',
|
||||
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
|
||||
'display_id': '65B5PNoBtf',
|
||||
'like_count': int,
|
||||
'release_timestamp': 1701249480,
|
||||
'uploader': 'The Trash Network',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'comment_count': int,
|
||||
'title': 'The $50 electronic drum kit.',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
|
||||
'dislike_count': int,
|
||||
'channel': 'The Drum Thing',
|
||||
'release_date': '20231129',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ISPJjexylS',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'The $50 electronic drum kit. .mov',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 622,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'qKfxu6fEpu',
|
||||
'ext': 'aac',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'Roland TD-7 Demo.m4a',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 114,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}],
|
||||
'skip': 'requires subscription: "The Trash Network"',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -124,6 +183,22 @@ class FloatplaneIE(InfoExtractor):
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
@@ -150,11 +225,11 @@ class FloatplaneIE(InfoExtractor):
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': 'name',
|
||||
'format_note': 'label',
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
@@ -164,38 +239,28 @@ class FloatplaneIE(InfoExtractor):
|
||||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'uploader': ('creator', 'title'),
|
||||
'uploader_id': ('creator', 'id'),
|
||||
'channel': ('channel', 'title'),
|
||||
'channel_id': ('channel', 'id'),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': channel_url,
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
|
||||
@@ -1,25 +1,29 @@
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
|
||||
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
|
||||
'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
'duration': 426.0,
|
||||
'cast': ['United Creators PMB GmbH'],
|
||||
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
|
||||
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'episode_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
@@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, nexx_id = self._match_valid_url(url).groups()
|
||||
video = self._download_json(
|
||||
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:' + nexx_id,
|
||||
'url': f'nexx:741:{nexx_id}',
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': nexx_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'channel_id': str_or_none(video.get('channelId')),
|
||||
'display_id': display_id,
|
||||
'tags': video.get('tags'),
|
||||
'thumbnail': video.get('imageUrlLandscape'),
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ class GofileIE(InfoExtractor):
|
||||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
||||
@@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
|
||||
'title': 'A Family for the Holidays',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}]
|
||||
|
||||
_id_token = None
|
||||
@@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
|
||||
video_id, headers={
|
||||
'Authorization': 'Bearer %s' % self._id_token,
|
||||
**self.geo_verification_headers(),
|
||||
})
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
if 'manifestUrls' in api:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
|
||||
else:
|
||||
if 'ssai' not in api:
|
||||
raise ExtractorError('expecting Google SSAI stream')
|
||||
|
||||
ssai_content_source_id = api['ssai']['contentSourceID']
|
||||
ssai_video_id = api['ssai']['videoID']
|
||||
|
||||
dai = self._download_json(
|
||||
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
|
||||
video_id, data=b'{"api-key":"null"}',
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
|
||||
|
||||
# skip pre-roll and mid-roll ads
|
||||
periods = [p for p in periods if '-ad-' not in p['id']]
|
||||
|
||||
formats, subtitles = self._merge_mpd_periods(periods)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
|
||||
'md5': 'e94de44cd80818084352fcf8de1ce82c',
|
||||
'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
|
||||
'info_dict': {
|
||||
'id': 'g9j7Eovo',
|
||||
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
|
||||
@@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
|
||||
'md5': '0b3f10332b812034b3a3eda1ef877c5f',
|
||||
'md5': '319c662943dd777bab835cae1e2d73a5',
|
||||
'info_dict': {
|
||||
'id': 'LeAgybyc',
|
||||
'title': 'Intelligence artificielle : faut-il s’en méfier ?',
|
||||
@@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
|
||||
'md5': '3972ddf2d5f8b98699f191687258e2f9',
|
||||
'md5': '6289f9489efb969e38245f31721596fe',
|
||||
'info_dict': {
|
||||
'id': 'QChnbPYA',
|
||||
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
|
||||
@@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
|
||||
'md5': '3ac0a0769546ee6be41ab52caea5d9a9',
|
||||
'md5': 'f6df814cae53e85937621599d2967520',
|
||||
'info_dict': {
|
||||
'id': 'QJzqoNbf',
|
||||
'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
|
||||
@@ -73,7 +73,8 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData']
|
||||
player_data = self._search_nextjs_data(
|
||||
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
|
||||
|
||||
return self.url_result(
|
||||
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
|
||||
|
||||
@@ -3,16 +3,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
mimetype2ext,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -83,15 +82,29 @@ class LinkedInLearningBaseIE(LinkedInBaseIE):
|
||||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
'id': '6850898786781339649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing',
|
||||
'description': 'md5:be125430bab1c574f16aeb186a4d5b19',
|
||||
'creator': 'Mishal K.'
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
|
||||
'description': 'md5:2998a31f6f479376dd62831f53a80f71',
|
||||
'uploader': 'Mishal K.',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
|
||||
'info_dict': {
|
||||
'id': '7151241570371948544',
|
||||
'ext': 'mp4',
|
||||
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
|
||||
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
|
||||
'uploader': 'MathWorks',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1'
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -99,26 +112,30 @@ class LinkedInIE(LinkedInBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
|
||||
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
|
||||
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
|
||||
|
||||
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
|
||||
video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
|
||||
sources = self._parse_json(video_attrs['data-sources'], video_id)
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'ext': mimetype2ext(source.get('type')),
|
||||
'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
|
||||
} for source in sources]
|
||||
subtitles = {'en': [{
|
||||
'url': video_attrs['data-captions-url'],
|
||||
'ext': 'vtt',
|
||||
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'like_count': int_or_none(self._search_regex(
|
||||
r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
|
||||
'uploader': traverse_obj(
|
||||
self._yield_json_ld(webpage, video_id),
|
||||
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
||||
282
yt_dlp/extractor/lsm.py
Normal file
282
yt_dlp/extractor/lsm.py
Normal file
@@ -0,0 +1,282 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LSMLREmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
|
||||
pieci
|
||||
)\.lv/[^/?#]+/(?:
|
||||
pleijeris|embed
|
||||
)/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '1270',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
|
||||
'info_dict': {
|
||||
'id': 'a297397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
|
||||
'duration': 3300,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
|
||||
'md5': '24810d4a961da2295d9860afdcaf4f5a',
|
||||
'info_dict': {
|
||||
'id': 'a230690',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
|
||||
'duration': 1788,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '166557',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
|
||||
'info_dict': {
|
||||
'id': 'a303104',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
|
||||
'duration': 3222,
|
||||
},
|
||||
}, {
|
||||
'md5': '5d5e191e718b7644e5118b7b4e093a6d',
|
||||
'info_dict': {
|
||||
'id': 'v303104',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
|
||||
'duration': 3222,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
|
||||
webpage, 'player json', group=('player', 'media'))
|
||||
|
||||
player_json = self._parse_json(
|
||||
player_data, video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
|
||||
|
||||
entries = []
|
||||
for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
|
||||
formats = []
|
||||
for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
|
||||
if determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({'url': source_url})
|
||||
|
||||
id_ = item['id']
|
||||
title = item.get('title')
|
||||
if id_.startswith('v') and not title:
|
||||
title = traverse_obj(
|
||||
media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
|
||||
{lambda x: x and f'{x} - Video Version'}), get_all=False)
|
||||
|
||||
entries.append({
|
||||
'formats': formats,
|
||||
'thumbnail': urljoin(url, player_json.get('poster')),
|
||||
'id': id_,
|
||||
'title': title,
|
||||
'duration': traverse_obj(item, ('duration', {int_or_none})),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
|
||||
class LSMLTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'LTV_16plus',
|
||||
'release_date': '20220514',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'view_count': int,
|
||||
'availability': 'public',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
|
||||
'release_timestamp': 1652544074,
|
||||
'title': 'EIROVĪZIJA SALĀTOS',
|
||||
'live_status': 'was_live',
|
||||
'uploader_id': '@LTV16plus',
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'duration': 5269,
|
||||
'upload_date': '20220514',
|
||||
'age_limit': 0,
|
||||
'channel': 'LTV_16plus',
|
||||
'playable_in_embed': True,
|
||||
'tags': [],
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote(self._match_id(url))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
ie_key = 'Youtube'
|
||||
embed_url = traverse_obj(data, ('source', 'id', {str}))
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported embed type {embed_type!r}')
|
||||
|
||||
return self.url_result(
|
||||
embed_url, ie_key, video_id, **traverse_obj(data, {
|
||||
'title': ('parentInfo', 'title'),
|
||||
'duration': ('parentInfo', 'duration', {int_or_none}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700586300,
|
||||
'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_nuxt_data(self, webpage):
|
||||
return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
|
||||
'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
@@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
|
||||
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/watch/celebration-nation',
|
||||
'info_dict': {
|
||||
'id': 'celebration-nation',
|
||||
'ext': 'mp4',
|
||||
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
|
||||
'duration': 2640.0,
|
||||
'title': 'Ancestors',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', 'reactContext',
|
||||
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
|
||||
|
||||
return {
|
||||
|
||||
@@ -8,7 +8,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +17,7 @@ class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'md5': '03e4911fdcf7fce563090705c2e79267',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
@@ -33,8 +34,8 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
@@ -52,7 +53,7 @@ class MedalTVIE(InfoExtractor):
|
||||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
@@ -81,7 +82,7 @@ class MedalTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
|
||||
@@ -177,6 +177,7 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_EXTRA_QUERY = {}
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
@@ -199,7 +200,7 @@ class MotherlessPaginatedIE(InfoExtractor):
|
||||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
@@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
|
||||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
|
||||
},
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
@@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 190,
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
@@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
|
||||
class MotherlessUploaderIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/u/Mrgo4hrs2023',
|
||||
'info_dict': {
|
||||
'id': 'Mrgo4hrs2023',
|
||||
'title': "Mrgo4hrs2023's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://motherless.com/u/Happy_couple?t=v',
|
||||
'info_dict': {
|
||||
'id': 'Happy_couple',
|
||||
'title': "Happy_couple's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
_EXTRA_QUERY = {'t': 'v'}
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
|
||||
|
||||
171
yt_dlp/extractor/mx3.py
Normal file
171
yt_dlp/extractor/mx3.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Mx3BaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
|
||||
_FORMATS = [{
|
||||
'url': 'player_asset',
|
||||
'format_id': 'default',
|
||||
'quality': 0,
|
||||
}, {
|
||||
'url': 'player_asset?quality=hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 1,
|
||||
}, {
|
||||
'url': 'download',
|
||||
'format_id': 'download',
|
||||
'quality': 2,
|
||||
}, {
|
||||
'url': 'player_asset?quality=source',
|
||||
'format_id': 'source',
|
||||
'quality': 2,
|
||||
}]
|
||||
|
||||
def _extract_formats(self, track_id):
|
||||
formats = []
|
||||
for fmt in self._FORMATS:
|
||||
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
|
||||
note=f'Checking for format {fmt["format_id"]}')
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
**fmt,
|
||||
'url': format_url,
|
||||
'ext': urlhandle_detect_ext(urlh),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
more_info = get_element_by_class('single-more-info', webpage)
|
||||
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
|
||||
|
||||
def get_info_field(name):
|
||||
return self._html_search_regex(
|
||||
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
|
||||
more_info, name, default=None, flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'formats': self._extract_formats(track_id),
|
||||
'genre': self._html_search_regex(
|
||||
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
|
||||
'release_year': int_or_none(get_info_field('Year of creation')),
|
||||
'description': get_info_field('Description'),
|
||||
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'artist': (('performer_name', 'artist'), {str}),
|
||||
'album_artist': ('artist', {str}),
|
||||
'composer': ('composer_name', {str}),
|
||||
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class Mx3IE(Mx3BaseIE):
|
||||
_DOMAIN = 'mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://mx3.ch/t/1Cru',
|
||||
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
|
||||
'info_dict': {
|
||||
'id': '1Cru',
|
||||
'ext': 'wav',
|
||||
'artist': 'Godina',
|
||||
'album_artist': 'Tortue Tortue',
|
||||
'composer': 'Olivier Godinat',
|
||||
'genre': 'Rock',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
|
||||
'title': "S'envoler",
|
||||
'release_year': 2021,
|
||||
'tags': [],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1LIY',
|
||||
'md5': '48293cb908342547827f963a5a2e9118',
|
||||
'info_dict': {
|
||||
'id': '1LIY',
|
||||
'ext': 'mov',
|
||||
'artist': 'Tania Kimfumu',
|
||||
'album_artist': 'The Broots',
|
||||
'composer': 'Emmanuel Diserens',
|
||||
'genre': 'Electro',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
|
||||
'title': 'The Broots-Larytta remix "Begging For Help"',
|
||||
'release_year': 2023,
|
||||
'tags': ['the broots', 'cassata records', 'larytta'],
|
||||
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1C6E',
|
||||
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
|
||||
'info_dict': {
|
||||
'id': '1C6E',
|
||||
'ext': 'wav',
|
||||
'artist': 'Alien Bubblegum',
|
||||
'album_artist': 'Alien Bubblegum',
|
||||
'composer': 'Alien Bubblegum',
|
||||
'genre': 'Punk',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
|
||||
'title': 'Wide Awake',
|
||||
'release_year': 2021,
|
||||
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3NeoIE(Mx3BaseIE):
|
||||
_DOMAIN = 'neo.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://neo.mx3.ch/t/1hpd',
|
||||
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
|
||||
'info_dict': {
|
||||
'id': '1hpd',
|
||||
'ext': 'wav',
|
||||
'artist': 'Baptiste Lopez',
|
||||
'album_artist': 'Kammerorchester Basel',
|
||||
'composer': 'Jannik Giger',
|
||||
'genre': 'Composition, Orchestra',
|
||||
'title': 'Troisième œil. Für Kammerorchester (2023)',
|
||||
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
|
||||
'release_year': 2023,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3VolksmusikIE(Mx3BaseIE):
|
||||
_DOMAIN = 'volksmusik.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://volksmusik.mx3.ch/t/Zx',
|
||||
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
|
||||
'info_dict': {
|
||||
'id': 'Zx',
|
||||
'ext': 'mp3',
|
||||
'artist': 'Ländlerkapelle GrischArt',
|
||||
'album_artist': 'Ländlerkapelle GrischArt',
|
||||
'composer': 'Urs Glauser',
|
||||
'genre': 'Instrumental, Graubünden',
|
||||
'title': 'Chämilouf',
|
||||
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
|
||||
'release_year': 2012,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
||||
@@ -1,6 +1,7 @@
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from .art19 import Art19IE
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class NebulaIE(NebulaBaseIE):
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
|
||||
IE_NAME = 'nebula:video'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'info_dict': {
|
||||
@@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
|
||||
|
||||
|
||||
class NebulaClassIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:class'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
|
||||
IE_NAME = 'nebula:media'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
|
||||
'info_dict': {
|
||||
@@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
|
||||
'title': 'Photos, Sculpture, and Video',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
|
||||
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
|
||||
'series_id': '335e8159-d663-491a-888f-1732285706ac',
|
||||
'modified_timestamp': 1599091504,
|
||||
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
|
||||
'series': 'Extremities',
|
||||
'modified_date': '20200903',
|
||||
'upload_date': '20200902',
|
||||
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
|
||||
'release_timestamp': 1571237958,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'duration': 1546.05714,
|
||||
'timestamp': 1599085608,
|
||||
'release_date': '20191016',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20230304',
|
||||
'modified_date': '20230403',
|
||||
'series': 'The Layover',
|
||||
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'modified_timestamp': 1680554566,
|
||||
'duration': 3130.46401,
|
||||
'release_timestamp': 1677943800,
|
||||
'title': 'The Layover — Episode 1',
|
||||
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
|
||||
'upload_date': '20230303',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1677883672,
|
||||
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
|
||||
|
||||
metadata = self._call_api(
|
||||
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
|
||||
slug, note='Fetching video metadata')
|
||||
return {
|
||||
**self._extract_video_metadata(metadata),
|
||||
**self._extract_formats(metadata['id'], slug),
|
||||
}
|
||||
slug, note='Fetching class/podcast metadata')
|
||||
content_type = metadata.get('type')
|
||||
if content_type == 'lesson':
|
||||
return {
|
||||
**self._extract_video_metadata(metadata),
|
||||
**self._extract_formats(metadata['id'], slug),
|
||||
}
|
||||
elif content_type == 'podcast_episode':
|
||||
episode_url = metadata['episode_url']
|
||||
if not episode_url and metadata.get('premium'):
|
||||
self.raise_login_required()
|
||||
|
||||
if Art19IE.suitable(episode_url):
|
||||
return self.url_result(episode_url, Art19IE)
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'url': ('episode_url', {url_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('published_at', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel_id': ('channel_id', {str}),
|
||||
'chnanel': ('channel_title', {str}),
|
||||
'thumbnail': ('assets', 'regular', {url_or_none}),
|
||||
})
|
||||
|
||||
raise ExtractorError(f'Unexpected content type {content_type!r}')
|
||||
|
||||
|
||||
class NebulaSubscriptionsIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:subscriptions'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/myshows',
|
||||
'playlist_mincount': 1,
|
||||
@@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
|
||||
|
||||
class NebulaChannelIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:channel'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
|
||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/tom-scott-presents-money',
|
||||
'info_dict': {
|
||||
@@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
|
||||
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/trussissuespodcast',
|
||||
'info_dict': {
|
||||
'id': 'trussissuespodcast',
|
||||
'title': 'The TLDR News Podcast',
|
||||
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
|
||||
def _generate_playlist_entries(self, collection_id, collection_slug):
|
||||
@@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
|
||||
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
|
||||
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
|
||||
|
||||
def _generate_podcast_entries(self, collection_id, collection_slug):
|
||||
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
|
||||
|
||||
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
|
||||
yield self.url_result(episode['share_url'], NebulaClassIE)
|
||||
next_url = episodes.get('next')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_slug = self._match_id(url)
|
||||
channel = self._call_api(
|
||||
@@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
|
||||
|
||||
if channel.get('type') == 'class':
|
||||
entries = self._generate_class_entries(channel)
|
||||
elif channel.get('type') == 'podcast_channel':
|
||||
entries = self._generate_podcast_entries(channel['id'], collection_slug)
|
||||
else:
|
||||
entries = self._generate_playlist_entries(channel['id'], collection_slug)
|
||||
|
||||
|
||||
@@ -1,33 +1,38 @@
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NerdCubedFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||
'url': 'http://www.nerdcubed.co.uk/',
|
||||
'info_dict': {
|
||||
'id': 'nerdcubed-feed',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
},
|
||||
'playlist_mincount': 1300,
|
||||
'playlist_mincount': 5500,
|
||||
}
|
||||
|
||||
def _extract_video(self, feed_entry):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
|
||||
**traverse_obj(feed_entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'channel': ('source', 'name', {str}),
|
||||
'channel_id': ('source', 'id', {str}),
|
||||
'channel_url': ('source', 'url', {str}),
|
||||
'thumbnail': ('thumbnail', 'source', {url_or_none}),
|
||||
}), url_transparent=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed')
|
||||
video_id = 'nerdcubed-feed'
|
||||
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'title': feed_entry['title'],
|
||||
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
|
||||
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
|
||||
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
|
||||
} for feed_entry in feed]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
'id': 'nerdcubed-feed',
|
||||
'entries': entries,
|
||||
}
|
||||
return self.playlist_result(
|
||||
map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
|
||||
video_id, 'nerdcubed.co.uk feed')
|
||||
|
||||
@@ -3,15 +3,15 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
|
||||
def _fetch_page(self, channel_id, url, page):
|
||||
page += 1
|
||||
posts_info = self._download_json(
|
||||
f'{url}/page/{page}', channel_id,
|
||||
f'{url}?page={page}', channel_id,
|
||||
note=f'Downloading page {page}', headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
sequence = posts_info.get('sequence', [])
|
||||
for year in sequence:
|
||||
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
|
||||
for post in posts:
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
@@ -1,10 +1,54 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)'
|
||||
class NFBBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||
return self._search_json(
|
||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
||||
|
||||
def _extract_ep_info(self, data, video_id, slug=None):
|
||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'uploader': ('data_layer', 'episodeMaker', {str}),
|
||||
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
|
||||
'episode': ('data_layer', 'episodeTitle', {str}),
|
||||
'season': ('data_layer', 'seasonTitle', {str}),
|
||||
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
|
||||
'series': ('data_layer', 'seriesTitle', {str}),
|
||||
}), get_all=False)
|
||||
|
||||
return {
|
||||
**info,
|
||||
'id': video_id,
|
||||
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
|
||||
}
|
||||
|
||||
|
||||
class NFBIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'nfb.ca and onf.ca films and episodes'
|
||||
_VALID_URL = [
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'note': 'NFB film',
|
||||
'url': 'https://www.nfb.ca/film/trafficopter/',
|
||||
'info_dict': {
|
||||
'id': 'trafficopter',
|
||||
@@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Barrie Howells',
|
||||
'release_year': 1972,
|
||||
'duration': 600.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF film',
|
||||
'url': 'https://www.onf.ca/film/mal-du-siecle/',
|
||||
'info_dict': {
|
||||
'id': 'mal-du-siecle',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le mal du siècle',
|
||||
'description': 'md5:1abf774d77569ebe603419f2d344102b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Catherine Lepage',
|
||||
'release_year': 2019,
|
||||
'duration': 300.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with English title',
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'true-north-episode9-true-north-finale-making-it',
|
||||
'ext': 'mp4',
|
||||
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
|
||||
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
|
||||
'series': 'True North: Inside the Rise of Toronto Basketball',
|
||||
'release_year': 2018,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale: Making It',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with French title',
|
||||
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'direction-nord-episode-9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
|
||||
'description': 'md5:349a57419b71432b97bf6083d92b029d',
|
||||
'series': 'Direction nord – La montée du basketball à Toronto',
|
||||
'release_year': 2018,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale : Réussir',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with French title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with English title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
|
||||
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
|
||||
'info_dict': {
|
||||
'id': 'liste-des-choses-qui-existent-saison-2-ours',
|
||||
'ext': 'mp4',
|
||||
'title': 'La liste des choses qui existent - L\'ours en peluche',
|
||||
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
|
||||
'series': 'La liste des choses qui existent',
|
||||
'release_year': 2022,
|
||||
'season': 'Saison 2',
|
||||
'season_number': 2,
|
||||
'episode': 'L\'ours en peluche',
|
||||
'episode_number': 12,
|
||||
'uploader': 'Francis Papillon',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB film /embed/player/ page',
|
||||
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
|
||||
'info_dict': {
|
||||
'id': 'afterlife',
|
||||
'ext': 'mp4',
|
||||
'title': 'Afterlife',
|
||||
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
|
||||
'release_year': 1978,
|
||||
'duration': 420.0,
|
||||
'uploader': 'Ishu Patel',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
# Need to construct the URL since we match /embed/player/ URLs as well
|
||||
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
||||
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||
|
||||
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id)
|
||||
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
||||
video_id = self._match_id(embed_url) # embed url has unique slug
|
||||
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
||||
if 'MESSAGE_GEOBLOCKED' in player:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
iframe = self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)',
|
||||
webpage, 'iframe', default=None, fatal=True)
|
||||
if iframe.startswith('/'):
|
||||
iframe = f'https://www.nfb.ca{iframe}'
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
||||
video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
player = self._download_webpage(iframe, video_id)
|
||||
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt['format_note'] = 'described video'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
source = self._html_search_regex(
|
||||
r'source:\s*\'([^\']+)',
|
||||
player, 'source', default=None, fatal=True)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
||||
@@ -45,14 +252,49 @@ class NFBIE(InfoExtractor):
|
||||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'thumbnail': self._html_search_regex(
|
||||
r'poster:\s*\'([^\']+)',
|
||||
player, 'thumbnail', default=None),
|
||||
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
||||
'uploader': self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', default=None),
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||
'release_year': int_or_none(self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
||||
webpage, 'release_year', default=None)),
|
||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
}, info, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
|
||||
class NFBSeriesIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb:series'
|
||||
IE_DESC = 'nfb.ca and onf.ca series'
|
||||
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
|
||||
'playlist_mincount': 9,
|
||||
'info_dict': {
|
||||
'id': 'true-north-inside-the-rise-of-toronto-basketball',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
|
||||
'playlist_mincount': 26,
|
||||
'info_dict': {
|
||||
'id': 'la-liste-des-choses-qui-existent-serie',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, episodes):
|
||||
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
|
||||
mobj = NFBIE._match_valid_url(episode['embed_url'])
|
||||
yield self.url_result(
|
||||
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
season_path = 'saison' if type_ == 'serie' else 'season'
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
|
||||
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
|
||||
|
||||
return self.playlist_result(self._entries(episodes), series_id)
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
||||
'skip': 'Episode expired on 2023-04-16',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
|
||||
'skip': 'Episode expired on 2024-02-24',
|
||||
'info_dict': {
|
||||
'channel': 'NHK-FM',
|
||||
'uploader': 'NHK-FM',
|
||||
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
||||
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
|
||||
'id': '0449_01_3926210',
|
||||
'ext': 'm4a',
|
||||
'id': '0449_01_3853544',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'uploader': 'NHK-FM',
|
||||
'channel': 'NHK-FM',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'timestamp': 1680969600,
|
||||
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
||||
'upload_date': '20230408',
|
||||
'release_timestamp': 1680962400,
|
||||
'release_date': '20230408',
|
||||
'was_live': True,
|
||||
'release_date': '20240217',
|
||||
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
|
||||
'timestamp': 1708185600,
|
||||
'release_timestamp': 1708178400,
|
||||
'upload_date': '20240217',
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
@@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||
'release_timestamp': 1481126700,
|
||||
'upload_date': '20211101',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||
@@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
def _extract_extended_description(self, episode_id, episode):
|
||||
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
|
||||
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
|
||||
if not detail_url:
|
||||
return
|
||||
|
||||
full_meta = traverse_obj(
|
||||
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
|
||||
('list', service, 0, {dict})) or {}
|
||||
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
|
||||
|
||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||
description = self._extract_extended_description(episode_id, episode)
|
||||
if not description:
|
||||
self.report_warning('Failed to get extended description, falling back to summary')
|
||||
description = traverse_obj(episode, ('file_title_sub', {str}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
@@ -551,14 +571,21 @@ class NhkRadiruIE(InfoExtractor):
|
||||
'was_live': True,
|
||||
'series': series_meta.get('title'),
|
||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||
'description': description,
|
||||
**traverse_obj(episode, {
|
||||
'title': 'file_title',
|
||||
'description': 'file_title_sub',
|
||||
'timestamp': ('open_time', {unified_timestamp}),
|
||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._API_URL_TMPL:
|
||||
return
|
||||
api_config = self._download_xml(
|
||||
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
|
||||
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
||||
@@ -172,9 +172,6 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_COMMENT_API_ENDPOINTS = (
|
||||
'https://nvcomment.nicovideo.jp/legacy/api.json',
|
||||
'https://nmsg.nicovideo.jp/api.json',)
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
@@ -470,93 +467,16 @@ class NiconicoIE(InfoExtractor):
|
||||
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
|
||||
or get_video_info('duration')),
|
||||
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
|
||||
'subtitles': self.extract_subtitles(video_id, api_data),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, api_data, session_api_data):
|
||||
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
|
||||
user_id_str = session_api_data.get('serviceUserId')
|
||||
|
||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
|
||||
|
||||
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
|
||||
new_danmaku = self._extract_new_comments(
|
||||
new_comments.get('server'), video_id,
|
||||
new_comments.get('params'), new_comments.get('threadKey'))
|
||||
|
||||
if not legacy_danmaku and not new_danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(legacy_danmaku + new_danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
|
||||
auth_data = {
|
||||
'user_id': user_id,
|
||||
'userkey': user_key,
|
||||
} if user_id and user_key else {'user_id': ''}
|
||||
|
||||
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
|
||||
|
||||
# Request Start
|
||||
post_data = [{'ping': {'content': 'rs:0'}}]
|
||||
for i, thread in enumerate(threads):
|
||||
thread_id = thread['id']
|
||||
thread_fork = thread['fork']
|
||||
# Post Start (2N)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
|
||||
post_data.append({'thread': {
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
'version': '20090904',
|
||||
'with_global': 1,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
|
||||
|
||||
# Post Start (2N+1)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
|
||||
post_data.append({'thread_leaves': {
|
||||
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
|
||||
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
|
||||
'content': '0-999999:999999,999999,nicoru:999999',
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N+1)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
|
||||
# Request Final
|
||||
post_data.append({'ping': {'content': 'rf:0'}})
|
||||
|
||||
return self._download_json(
|
||||
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
},
|
||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||
|
||||
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
||||
comments = self._download_json(
|
||||
f'{endpoint}/v1/threads', video_id, data=json.dumps({
|
||||
def _get_subtitles(self, video_id, api_data):
|
||||
comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
|
||||
danmaku = traverse_obj(self._download_json(
|
||||
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
|
||||
'additionals': {},
|
||||
'params': params,
|
||||
'threadKey': thread_key,
|
||||
'params': comments_info.get('params'),
|
||||
'threadKey': comments_info.get('threadKey'),
|
||||
}).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
@@ -566,8 +486,19 @@ class NiconicoIE(InfoExtractor):
|
||||
'x-frontend-id': '6',
|
||||
'x-frontend-version': '0',
|
||||
},
|
||||
note='Downloading comments (new)', errnote='Failed to download comments (new)')
|
||||
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
|
||||
note='Downloading comments', errnote='Failed to download comments'),
|
||||
('data', 'threads', ..., 'comments', ...))
|
||||
|
||||
if not danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
225
yt_dlp/extractor/ninaprotocol.py
Normal file
225
yt_dlp/extractor/ninaprotocol.py
Normal file
@@ -0,0 +1,225 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NinaProtocolIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'title': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'channel': 'ppm',
|
||||
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'display_id': 'the-spatulas-march-chant',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
|
||||
'title': 'March Chant In April',
|
||||
'track': 'March Chant In April',
|
||||
'ext': 'mp3',
|
||||
'duration': 152,
|
||||
'track_number': 1,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'channel': 'ppm',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
|
||||
'title': 'Rescue Mission',
|
||||
'track': 'Rescue Mission',
|
||||
'ext': 'mp3',
|
||||
'duration': 212,
|
||||
'track_number': 2,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'timestamp': 1701417610,
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
|
||||
'title': 'Slinger Style',
|
||||
'track': 'Slinger Style',
|
||||
'ext': 'mp3',
|
||||
'duration': 179,
|
||||
'track_number': 3,
|
||||
'timestamp': 1701417610,
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel': 'ppm',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
|
||||
'title': 'Psychic Signal',
|
||||
'track': 'Psychic Signal',
|
||||
'ext': 'mp3',
|
||||
'duration': 220,
|
||||
'track_number': 4,
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'upload_date': '20231201',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'channel': 'ppm',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
|
||||
'title': 'Curvy Color',
|
||||
'track': 'Curvy Color',
|
||||
'ext': 'mp3',
|
||||
'duration': 148,
|
||||
'track_number': 5,
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel': 'ppm',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
|
||||
'title': 'Caveman Star',
|
||||
'track': 'Caveman Star',
|
||||
'ext': 'mp3',
|
||||
'duration': 121,
|
||||
'track_number': 6,
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
|
||||
'info_dict': {
|
||||
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
|
||||
'description': 'md5:63f08d5db558b4b36e1896f317062721',
|
||||
'title': 'F.G.S. - American Shield',
|
||||
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
|
||||
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
|
||||
'channel': 'tinkscough',
|
||||
'tags': [],
|
||||
'album_artist': 'F.G.S.',
|
||||
'album': 'F.G.S. - American Shield',
|
||||
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
|
||||
'display_id': 'f-g-s-american-shield',
|
||||
'uploader': 'flannerysilva',
|
||||
'timestamp': 1702395858,
|
||||
'upload_date': '20231212',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
|
||||
'info_dict': {
|
||||
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
|
||||
'display_id': 'time-to-figure-things-out',
|
||||
'description': 'md5:960202ed01c3134bb8958f1008527e35',
|
||||
'timestamp': 1706283607,
|
||||
'title': 'DJ STEPDAD - time to figure things out',
|
||||
'album_artist': 'DJ STEPDAD',
|
||||
'uploader': 'tddvsss',
|
||||
'upload_date': '20240126',
|
||||
'album': 'time to figure things out',
|
||||
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
|
||||
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
release = self._download_json(
|
||||
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
|
||||
|
||||
video_id = release.get('publicKey') or video_id
|
||||
|
||||
common_info = traverse_obj(release, {
|
||||
'album': ('metadata', 'properties', 'title', {str}),
|
||||
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
|
||||
'timestamp': ('datetime', {parse_iso8601}),
|
||||
'thumbnail': ('metadata', 'image', {url_or_none}),
|
||||
'uploader': ('publisherAccount', 'handle', {str}),
|
||||
'uploader_id': ('publisherAccount', 'publicKey', {str}),
|
||||
'channel': ('hub', 'handle', {str}),
|
||||
'channel_id': ('hub', 'publicKey', {str}),
|
||||
}, get_all=False)
|
||||
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
|
||||
|
||||
entries = []
|
||||
for track_num, track in enumerate(traverse_obj(release, (
|
||||
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{track_num}',
|
||||
'url': track['uri'],
|
||||
**traverse_obj(track, {
|
||||
'title': ('track_title', {str}),
|
||||
'track': ('track_title', {str}),
|
||||
'ext': ('type', {mimetype2ext}),
|
||||
'track_number': ('track', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
**common_info,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(release, {
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('metadata', 'name', {str}),
|
||||
'description': ('metadata', 'description', {str}),
|
||||
}),
|
||||
**common_info,
|
||||
}
|
||||
@@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '249baab7d0104e186e78b0899c7d5f28',
|
||||
'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
|
||||
'info_dict': {
|
||||
'id': '1757139',
|
||||
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||
'id': '8OvQqEvV3MW',
|
||||
'display_id': '8OvQqEvV3MW',
|
||||
'ext': 'mp4',
|
||||
'title': 'Podzemní nemocnice v pražské Krči',
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'duration': 151,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
@@ -210,7 +211,7 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return {
|
||||
|
||||
@@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
|
||||
'duration': 172,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
@@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
||||
199
yt_dlp/extractor/nuum.py
Normal file
199
yt_dlp/extractor/nuum.py
Normal file
@@ -0,0 +1,199 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
UserNotLive,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NuumBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, description, query={}):
|
||||
response = self._download_json(
|
||||
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
|
||||
note=f'Downloading {description} metadata',
|
||||
errnote=f'Unable to download {description} metadata')
|
||||
if error := response.get('error'):
|
||||
raise ExtractorError(f'API returned error: {error!r}')
|
||||
return response['result']
|
||||
|
||||
def _get_channel_info(self, channel_name):
|
||||
return self._call_api(
|
||||
'broadcasts/public', video_id=channel_name, description='channel',
|
||||
query={
|
||||
'with_extra': 'true',
|
||||
'channel_name': channel_name,
|
||||
'with_deleted': 'true',
|
||||
})
|
||||
|
||||
def _parse_video_data(self, container, extract_formats=True):
|
||||
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
|
||||
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
|
||||
media_url = traverse_obj(media, (
|
||||
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
|
||||
|
||||
video_id = str(container['media_container_id'])
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'channel': ('media_container_channel', 'channel_name', {str}),
|
||||
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
|
||||
}),
|
||||
**traverse_obj(stream, {
|
||||
'view_count': ('stream_total_viewers', {int_or_none}),
|
||||
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(media, {
|
||||
'duration': ('media_duration', {int_or_none}),
|
||||
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
})
|
||||
|
||||
|
||||
class NuumMediaIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:media'
|
||||
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toxi$ - Hurtz',
|
||||
'description': '',
|
||||
'timestamp': 1702631651,
|
||||
'upload_date': '20231215',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '6911',
|
||||
'channel': 'toxis',
|
||||
'duration': 116,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuum.ru/clips/1552564-pro-misu',
|
||||
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
|
||||
'info_dict': {
|
||||
'id': '1552564',
|
||||
'ext': 'mp4',
|
||||
'title': 'Про Мису 🙃',
|
||||
'timestamp': 1701971828,
|
||||
'upload_date': '20231207',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '3320',
|
||||
'channel': 'Misalelik',
|
||||
'duration': 41,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
|
||||
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class NuumLiveIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:live'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/mts_live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
channel_info = self._get_channel_info(channel)
|
||||
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
info = self._parse_video_data(channel_info['media_container'])
|
||||
return {
|
||||
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
|
||||
'extractor_key': NuumMediaIE.ie_key(),
|
||||
'extractor': NuumMediaIE.IE_NAME,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class NuumTabIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:tab'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/dankon_/clips',
|
||||
'info_dict': {
|
||||
'id': 'dankon__clips',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/videos',
|
||||
'info_dict': {
|
||||
'id': 'dankon__videos',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/streams',
|
||||
'info_dict': {
|
||||
'id': 'dankon__streams',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, channel_id, tab_type, tab_id, page):
|
||||
CONTAINER_TYPES = {
|
||||
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
|
||||
'videos': ['LONG_VIDEO'],
|
||||
'streams': ['SINGLE'],
|
||||
}
|
||||
|
||||
media_containers = self._call_api(
|
||||
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
|
||||
query={
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
'channel_id': channel_id,
|
||||
'media_container_status': 'STOPPED',
|
||||
'media_container_type': CONTAINER_TYPES[tab_type],
|
||||
})
|
||||
for container in traverse_obj(media_containers, (..., {dict})):
|
||||
metadata = self._parse_video_data(container, extract_formats=False)
|
||||
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
|
||||
tab_id = f'{channel_name}_{tab_type}'
|
||||
channel_data = self._get_channel_info(channel_name)['channel']
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
|
||||
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
|
||||
@@ -1,50 +1,93 @@
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
|
||||
_DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
|
||||
_TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
|
||||
_GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
|
||||
_GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
|
||||
video(id: $id) {
|
||||
... on Video {
|
||||
bylines {
|
||||
renderedRepresentation
|
||||
}
|
||||
duration
|
||||
firstPublished
|
||||
promotionalHeadline
|
||||
promotionalMedia {
|
||||
... on Image {
|
||||
crops {
|
||||
name
|
||||
renditions {
|
||||
name
|
||||
width
|
||||
height
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
renditions {
|
||||
type
|
||||
width
|
||||
height
|
||||
url
|
||||
bitrate
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _extract_video_from_id(self, video_id):
|
||||
# Authorization generation algorithm is reverse engineered from `signer` in
|
||||
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
|
||||
path = '/svc/video/api/v3/video/' + video_id
|
||||
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
|
||||
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
|
||||
'X-NYTV': 'vhs',
|
||||
}, fatal=False)
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
def _call_api(self, media_id):
|
||||
# reference: `id-to-uri.js`
|
||||
video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
|
||||
media_uuid = uuid.uuid5(video_uuid, media_id)
|
||||
|
||||
title = video_data['headline']
|
||||
return traverse_obj(self._download_json(
|
||||
self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'id': f'nyt://video/{media_uuid}'},
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Nyt-App-Type': 'vhs',
|
||||
'Nyt-App-Version': 'v3.52.21',
|
||||
'Nyt-Token': self._TOKEN,
|
||||
'Origin': 'https://nytimes.com',
|
||||
}, fatal=False), ('data', 'video', {dict})) or {}
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
return file_size
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return None
|
||||
def _extract_thumbnails(self, thumbs):
|
||||
return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}), default=None)
|
||||
|
||||
def _extract_formats_and_subtitles(self, video_id, content_media_json):
|
||||
urls = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for video in video_data.get('renditions', []):
|
||||
for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
|
||||
video_url = video.get('url')
|
||||
format_id = video.get('type')
|
||||
if not video_url or format_id == 'thumbs' or video_url in urls:
|
||||
@@ -56,11 +99,9 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
formats.extend(m3u8_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
continue
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, format_id or 'dash', fatal=False))
|
||||
continue # all mpd urls give 404 errors
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -68,55 +109,50 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
'vcodec': video.get('videoencoding') or video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'filesize': traverse_obj(video, (
|
||||
('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': 'http://www.nytimes.com/' + image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
def _extract_video(self, media_id):
|
||||
data = self._call_api(media_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('summary'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': video_data.get('byline'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'id': media_id,
|
||||
'title': data.get('promotionalHeadline'),
|
||||
'description': data.get('summary'),
|
||||
'timestamp': parse_iso8601(data.get('firstPublished')),
|
||||
'duration': float_or_none(data.get('duration'), scale=1000),
|
||||
'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
|
||||
'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnails': self._extract_thumbnails(
|
||||
traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': 'd665342765db043f7e225cff19df0f2d',
|
||||
'md5': 'a553aa344014e3723d33893d89d4defc',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'timestamp': 1398646132,
|
||||
'upload_date': '20140428',
|
||||
'creator': 'Brett Weiner',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg',
|
||||
'duration': 419,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
@@ -125,138 +161,260 @@ class NYTimesIE(NYTimesBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class NYTimesArticleIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P<id>[^./?#]+)(?:\.html)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
|
||||
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000003628438',
|
||||
'ext': 'mov',
|
||||
'title': 'New Minimum Wage: $70,000 a Year',
|
||||
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
|
||||
'timestamp': 1429033037,
|
||||
'ext': 'mp4',
|
||||
'title': 'One Company’s New Minimum Wage: $70,000 a Year',
|
||||
'description': 'md5:89ba9ab67ca767bb92bf823d1f138433',
|
||||
'timestamp': 1429047468,
|
||||
'upload_date': '20150414',
|
||||
'uploader': 'Matthew Williams',
|
||||
}
|
||||
'creator': 'Patricia Cohen',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
|
||||
'md5': 'e0d52040cafb07662acf3c9132db3575',
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
'md5': '2365b3555c8aa7f4dd34ca735ad02e6a',
|
||||
'info_dict': {
|
||||
'id': '100000004709062',
|
||||
'title': 'The Run-Up: ‘He Was Like an Octopus’',
|
||||
'id': '100000009110381',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
|
||||
'series': 'The Run-Up',
|
||||
'episode': '‘He Was Like an Octopus’',
|
||||
'episode_number': 20,
|
||||
'duration': 2130,
|
||||
}
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000004709479',
|
||||
'title': 'The Rise of Hitler',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:bce877fd9e3444990cb141875fab0028',
|
||||
'creator': 'Pamela Paul',
|
||||
'duration': 3475,
|
||||
'id': '100000009202270',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kamala Harris Defends Biden Policies, but Says ‘More Work’ Needed to Reach Voters',
|
||||
'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f',
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
|
||||
# multiple videos in the same article
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html',
|
||||
'info_dict': {
|
||||
'id': 'air-traffic-controllers-safety',
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_podcast_from_json(self, json, page_id, webpage):
|
||||
podcast_audio = self._parse_json(
|
||||
json, page_id, transform_source=js_to_json)
|
||||
def _extract_content_from_block(self, block):
|
||||
details = traverse_obj(block, {
|
||||
'id': ('sourceId', {str}),
|
||||
'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
|
||||
'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
|
||||
'timestamp': ('firstPublished', {parse_iso8601}),
|
||||
'series': ('podcastSeries', {str}),
|
||||
}, get_all=False)
|
||||
|
||||
audio_data = podcast_audio['data']
|
||||
track = audio_data['track']
|
||||
|
||||
episode_title = track['title']
|
||||
video_url = track['source']
|
||||
|
||||
description = track.get('description') or self._html_search_meta(
|
||||
['og:description', 'twitter:description'], webpage)
|
||||
|
||||
podcast_title = audio_data.get('podcast', {}).get('title')
|
||||
title = ('%s: %s' % (podcast_title, episode_title)
|
||||
if podcast_title else episode_title)
|
||||
|
||||
episode = audio_data.get('podcast', {}).get('episode') or ''
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
|
||||
formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block)
|
||||
# audio articles will have an url and no formats
|
||||
url = traverse_obj(block, ('fileUrl', {url_or_none}))
|
||||
if not formats and url:
|
||||
formats.append({'url': url, 'vcodec': 'none'})
|
||||
|
||||
return {
|
||||
'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': track.get('credit'),
|
||||
'series': podcast_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
**details,
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-videoid=["\'](\d+)', webpage, 'video id',
|
||||
default=None, fatal=False)
|
||||
if video_id is not None:
|
||||
return self._extract_video_from_id(video_id)
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
podcast_data = self._search_regex(
|
||||
(r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
|
||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||
webpage, 'podcast data')
|
||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||
common_info = {
|
||||
'title': remove_end(self._html_extract_title(webpage), ' - The New York Times'),
|
||||
'description': traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
entries = []
|
||||
for block in blocks:
|
||||
entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
|
||||
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, page_id, **common_info)
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
**entries[0],
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||
IE_NAME = 'NYTimesCookingGuide'
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'info_dict': {
|
||||
'id': '100000004756089',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1479383008,
|
||||
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||
'title': 'Cranberry Tart',
|
||||
'upload_date': '20161117',
|
||||
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||
'id': '13-how-to-cook-a-turkey',
|
||||
'title': 'How to Cook a Turkey',
|
||||
'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# single video example
|
||||
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000005835845',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make Mac and Cheese',
|
||||
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
|
||||
'timestamp': 1522950315,
|
||||
'upload_date': '20180405',
|
||||
'duration': 9.51,
|
||||
'creator': 'Alison Roman',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||
'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000003951728',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1445509539,
|
||||
'description': 'Turkey guide',
|
||||
'upload_date': '20151022',
|
||||
'title': 'Turkey',
|
||||
}
|
||||
'id': '20-how-to-frost-a-cake',
|
||||
'title': 'How to Frost a Cake',
|
||||
'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
lead_video_id = self._search_regex(
|
||||
r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
|
||||
media_ids = traverse_obj(
|
||||
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
if media_ids:
|
||||
media_ids.append(lead_video_id)
|
||||
return self.playlist_result(
|
||||
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
|
||||
|
||||
return {
|
||||
**self._extract_video(lead_video_id),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': self._search_regex( # TODO: change to 'creators'
|
||||
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingRecipeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': '579e83bbe8e61e9de67f80edba8a78a8',
|
||||
'info_dict': {
|
||||
'id': '1017817',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cranberry Curd Tart',
|
||||
'description': 'md5:ad77a3fc321db636256d4343c5742152',
|
||||
'timestamp': 1447804800,
|
||||
'upload_date': '20151118',
|
||||
'creator': 'David Tanis',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
|
||||
'md5': '58df35998241dcf0620e99e646331b42',
|
||||
'info_dict': {
|
||||
'id': '1024781',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neapolitan Checkerboard Cookies',
|
||||
'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
|
||||
'timestamp': 1701302400,
|
||||
'upload_date': '20231130',
|
||||
'creator': 'Sue Li',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
|
||||
'md5': '2fe7965a3adc899913b8e25ada360823',
|
||||
'info_dict': {
|
||||
'id': '1019516',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1546387200,
|
||||
'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
|
||||
'upload_date': '20190102',
|
||||
'title': 'Overnight Oats',
|
||||
'creator': 'Genevieve Ko',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
**traverse_obj(recipe_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('topnote', {clean_html}),
|
||||
'timestamp': ('publishedAt', {int_or_none}),
|
||||
'creator': ('contentAttribution', 'cardByline', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
|
||||
recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import make_archive_id
|
||||
|
||||
|
||||
class OneFootballIE(InfoExtractor):
|
||||
@@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
|
||||
'info_dict': {
|
||||
'id': '34012334',
|
||||
'id': 'Y2VtcWAT',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highlights: FC Zürich 3-3 FC Basel',
|
||||
'description': 'md5:33d9855cb790702c4fe42a513700aba8',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334',
|
||||
'timestamp': 1635874604,
|
||||
'upload_date': '20211102'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
|
||||
'timestamp': 1635874895,
|
||||
'upload_date': '20211102',
|
||||
'duration': 375.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34012334'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
|
||||
'info_dict': {
|
||||
'id': '34041020',
|
||||
'id': 'leVJrMho',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klopp fumes at VAR decisions in West Ham defeat',
|
||||
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020',
|
||||
'timestamp': 1636314103,
|
||||
'upload_date': '20211107'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
|
||||
'timestamp': 1636315232,
|
||||
'upload_date': '20211107',
|
||||
'duration': 93.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34041020'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_json = self._search_json_ld(webpage, id)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
|
||||
return {
|
||||
'id': id,
|
||||
'title': data_json.get('title'),
|
||||
'description': data_json.get('description'),
|
||||
'thumbnail': data_json.get('thumbnail'),
|
||||
'timestamp': data_json.get('timestamp'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
data_json.pop('url', None)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
|
||||
|
||||
return self.url_result(
|
||||
m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
|
||||
**data_json, url_transparent=True)
|
||||
|
||||
@@ -12,6 +12,8 @@ from ..compat import compat_str
|
||||
|
||||
|
||||
class OpenRecBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
|
||||
|
||||
def _extract_pagestore(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
|
||||
@@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
|
||||
if not m3u8_url:
|
||||
continue
|
||||
yield from self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name)
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
|
||||
|
||||
def _extract_movie(self, webpage, video_id, name, is_live):
|
||||
window_stores = self._extract_pagestore(webpage, video_id)
|
||||
@@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
|
||||
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
|
||||
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
|
||||
'is_live': is_live,
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
@@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
raise ExtractorError('Cannot extract title')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
capture_data.get('source'), video_id, ext='mp4')
|
||||
capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
|
||||
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
|
||||
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
|
||||
'upload_date': unified_strdate(capture_data.get('createdAt')),
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import base64
|
||||
import functools
|
||||
import re
|
||||
|
||||
@@ -565,3 +566,66 @@ class ORFFM4StoryIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class ORFONIE(InfoExtractor):
|
||||
IE_NAME = 'orf:on'
|
||||
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||
'info_dict': {
|
||||
'id': '14210000',
|
||||
'ext': 'mp4',
|
||||
'duration': 2651.08,
|
||||
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
|
||||
'title': 'School of Champions (4/8)',
|
||||
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
|
||||
'media_type': 'episode',
|
||||
'timestamp': 1706472362,
|
||||
'upload_date': '20240128',
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, display_id):
|
||||
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||
api_json = self._download_json(
|
||||
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||
if manifest_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
||||
elif manifest_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, mpd_id='dash')
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(api_json, {
|
||||
'duration': ('duration_second', {float_or_none}),
|
||||
'title': (('title', 'headline'), {str}),
|
||||
'description': (('description', 'teaser_text'), {str}),
|
||||
'media_type': ('video_type', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||
**self._extract_video(video_id, display_id),
|
||||
}
|
||||
|
||||
@@ -275,7 +275,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'ext': ext,
|
||||
'url': post_file['url'],
|
||||
}
|
||||
elif name == 'video':
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
return {
|
||||
**info,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,10 +1,18 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'playsuisse'
|
||||
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -134,12 +142,47 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
url
|
||||
}'''
|
||||
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
|
||||
_LOGIN_PATH = 'B2C_1A__SignInV2'
|
||||
_ID_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
|
||||
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
|
||||
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
|
||||
|
||||
csrf_token = settings['csrf']
|
||||
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
|
||||
|
||||
status = traverse_obj(self._download_json(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
|
||||
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
|
||||
'request_type': 'RESPONSE',
|
||||
'signInName': username,
|
||||
'password': password
|
||||
}), expected_status=400), ('status', {int_or_none}))
|
||||
if status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
|
||||
urlh = self._request_webpage(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
|
||||
None, 'Downloading ID token', query={
|
||||
'rememberMe': 'false',
|
||||
'csrf_token': csrf_token,
|
||||
**query,
|
||||
'diags': '',
|
||||
})
|
||||
|
||||
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
response = self._download_json(
|
||||
'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
'operationName': 'AssetWatch',
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
@@ -150,6 +193,9 @@ class PlaySuisseIE(InfoExtractor):
|
||||
return response['data']['assetV2']
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not self._ID_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
info = self._extract_single(media_data)
|
||||
@@ -168,7 +214,8 @@ class PlaySuisseIE(InfoExtractor):
|
||||
if not media.get('url') or media.get('type') != 'HLS':
|
||||
continue
|
||||
f, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
|
||||
media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
||||
@@ -87,8 +87,8 @@ class PornHubBaseIE(InfoExtractor):
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
r'id="profileMenuDropdown"',
|
||||
r'class="ph-icon-logout"'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
|
||||
@@ -18,7 +18,6 @@ from ..utils.traversal import traverse_obj
|
||||
class Pr0grammIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||
_TESTS = [{
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
@@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
|
||||
'_old_archive_ids': ['pr0grammstatic 5466437'],
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||
'info_dict': {
|
||||
'id': '3052805',
|
||||
@@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
|
||||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5848332'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/top/5895149',
|
||||
'info_dict': {
|
||||
'id': '5895149',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
|
||||
'tags': 'count:19',
|
||||
'uploader': 'algoholigSeeManThrower',
|
||||
'uploader_id': 457556,
|
||||
'upload_timestamp': 1697580902,
|
||||
'upload_date': '20231018',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5895149'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'only_matching': True,
|
||||
@@ -92,15 +107,15 @@ class Pr0grammIE(InfoExtractor):
|
||||
def _maximum_flags(self):
|
||||
# We need to guess the flags for the content otherwise the api will raise an error
|
||||
# We can guess the maximum allowed flags for the account from the cookies
|
||||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b0001
|
||||
# Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b10001
|
||||
if self._is_logged_in:
|
||||
flags |= 0b1000
|
||||
flags |= 0b01000
|
||||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b0110
|
||||
flags |= 0b00110
|
||||
|
||||
return flags
|
||||
|
||||
@@ -134,14 +149,12 @@ class Pr0grammIE(InfoExtractor):
|
||||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
formats = traverse_obj(video_info, ('variants', ..., {
|
||||
'format_id': ('name', {str}),
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, traverse_obj, try_call
|
||||
from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PrankCastIE(InfoExtractor):
|
||||
@@ -64,3 +67,71 @@ class PrankCastIE(InfoExtractor):
|
||||
'categories': [json_info.get('broadcast_category')],
|
||||
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
|
||||
}
|
||||
|
||||
|
||||
class PrankCastPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
|
||||
'info_dict': {
|
||||
'id': '6214',
|
||||
'ext': 'mp3',
|
||||
'title': 'Happy National Rachel Day!',
|
||||
'display_id': 'happy-national-rachel-day-',
|
||||
'timestamp': 1704333938,
|
||||
'uploader': 'Devonanustart',
|
||||
'channel_id': '4',
|
||||
'duration': 13175,
|
||||
'cast': ['Devonanustart'],
|
||||
'description': '',
|
||||
'categories': ['prank call'],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
|
||||
'info_dict': {
|
||||
'id': '6217',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jake the Work Crow!',
|
||||
'display_id': 'jake-the-work-crow-',
|
||||
'timestamp': 1704346592,
|
||||
'uploader': 'despicabledogs',
|
||||
'channel_id': '957',
|
||||
'duration': 263.287,
|
||||
'cast': ['despicabledogs'],
|
||||
'description': 'https://imgur.com/a/vtxLvKU',
|
||||
'categories': [],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
|
||||
content = self._parse_json(post['post_contents_json'], video_id)[0]
|
||||
|
||||
uploader = post.get('user_name')
|
||||
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': post.get('post_title') or self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'url': content.get('url'),
|
||||
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
|
||||
'uploader': uploader,
|
||||
'channel_id': str_or_none(post.get('user_id')),
|
||||
'duration': float_or_none(content.get('duration')),
|
||||
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
|
||||
'description': post.get('post_body'),
|
||||
'categories': list(filter(None, [content.get('category')])),
|
||||
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
|
||||
'subtitles': {
|
||||
'live_chat': [{
|
||||
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
|
||||
'ext': 'json',
|
||||
}],
|
||||
} if post.get('content_id') else None
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import random
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,6 +12,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RadikoBaseIE(InfoExtractor):
|
||||
@@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_performers(self, prog):
|
||||
performers = traverse_obj(prog, (
|
||||
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip}))
|
||||
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
|
||||
return ', '.join(performers) or None
|
||||
|
||||
|
||||
class RadikoIE(RadikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||
@@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': try_call(lambda: prog.find('title').text),
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': clean_html(try_call(lambda: prog.find('info').text)),
|
||||
'uploader': try_call(lambda: station_program.find('.//name').text),
|
||||
'uploader_id': station,
|
||||
'timestamp': vid_int,
|
||||
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
|
||||
'is_live': True,
|
||||
'formats': self._extract_formats(
|
||||
video_id=video_id, station=station, is_onair=False,
|
||||
@@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE):
|
||||
return {
|
||||
'id': station,
|
||||
'title': title,
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': description,
|
||||
'uploader': station_name,
|
||||
'uploader_id': station,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@@ -91,7 +92,7 @@ class RaiBaseIE(InfoExtractor):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
if not audio_only and not is_live:
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats))
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
|
||||
|
||||
return filter_dict({
|
||||
'is_live': is_live,
|
||||
@@ -99,7 +100,7 @@ class RaiBaseIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts):
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
|
||||
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
|
||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||
_QUALITY = {
|
||||
@@ -166,6 +167,14 @@ class RaiBaseIE(InfoExtractor):
|
||||
'fps': 25,
|
||||
}
|
||||
|
||||
# Check if MP4 download is available
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
|
||||
return []
|
||||
|
||||
# filter out single-stream formats
|
||||
fmts = [f for f in fmts
|
||||
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']
|
||||
|
||||
135
yt_dlp/extractor/redge.py
Normal file
135
yt_dlp/extractor/redge.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RedCDNLivxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
|
||||
IE_NAME = 'redcdnlivx'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
|
||||
'info_dict': {
|
||||
'id': 'ENC02-638272860000-638292544000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC02',
|
||||
'duration': 19683.982,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
|
||||
'info_dict': {
|
||||
'id': 'ENC18-722333096000-722335562000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC18',
|
||||
'duration': 2463.995,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
|
||||
'info_dict': {
|
||||
'id': 'triathlon2018-warsaw-550305000000-550327620000',
|
||||
'ext': 'mp4',
|
||||
'title': 'triathlon2018/warsaw',
|
||||
'duration': 22619.98,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
"""
|
||||
Known methods (first in url path):
|
||||
- `livedash` - DASH MPD
|
||||
- `livehls` - HTTP Live Streaming
|
||||
- `livess` - IIS Smooth Streaming
|
||||
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
|
||||
- `sc` - shoutcast/icecast (audio streams, like radio)
|
||||
"""
|
||||
|
||||
def _real_extract(self, url):
|
||||
tenant, path = self._match_valid_url(url).group('tenant', 'id')
|
||||
qs = parse_qs(url)
|
||||
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
|
||||
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
|
||||
|
||||
def livx_mode(mode):
|
||||
suffix = ''
|
||||
if mode == 'livess':
|
||||
suffix = '/manifest'
|
||||
elif mode == 'livehls':
|
||||
suffix = '/playlist.m3u8'
|
||||
file_qs = {}
|
||||
if start_time:
|
||||
file_qs['startTime'] = start_time
|
||||
if stop_time:
|
||||
file_qs['stopTime'] = stop_time
|
||||
if mode == 'nvr':
|
||||
file_qs['nolimit'] = 1
|
||||
elif mode != 'sc':
|
||||
file_qs['indexMode'] = 'true'
|
||||
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
|
||||
|
||||
# no id or title for a transmission. making ones up.
|
||||
title = path \
|
||||
.replace('/live', '').replace('live/', '') \
|
||||
.replace('/channel', '').replace('channel/', '') \
|
||||
.strip('/')
|
||||
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
|
||||
|
||||
formats = []
|
||||
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
|
||||
ism_res = self._download_xml_handle(
|
||||
livx_mode('livess'), video_id,
|
||||
note='Downloading ISM manifest',
|
||||
errnote='Failed to download ISM manifest',
|
||||
fatal=False)
|
||||
ism_doc = None
|
||||
if ism_res is not False:
|
||||
ism_doc, ism_urlh = ism_res
|
||||
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
|
||||
|
||||
nvr_urlh = self._request_webpage(
|
||||
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
|
||||
expected_status=lambda _: True)
|
||||
if nvr_urlh and nvr_urlh.status == 200:
|
||||
formats.append({
|
||||
'url': nvr_urlh.url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'direct-0',
|
||||
'preference': -1, # might be slow
|
||||
})
|
||||
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
live_status = 'is_live'
|
||||
elif duration:
|
||||
live_status = 'was_live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
}
|
||||
@@ -7,11 +7,12 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.redtube.com/38864951',
|
||||
@@ -34,6 +35,9 @@ class RedTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://it.redtube.com/66418',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.redtube.com.br/103224331',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -79,7 +83,7 @@ class RedTubeIE(InfoExtractor):
|
||||
'media definitions', default='{}'),
|
||||
video_id, fatal=False)
|
||||
for media in medias if isinstance(medias, list) else []:
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('format')
|
||||
|
||||
@@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
|
||||
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
|
||||
'md5': '6f8fd68663e64936623e67c152a669e0',
|
||||
'info_dict': {
|
||||
'id': '10739193',
|
||||
'id': '10787730',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
|
||||
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
|
||||
'timestamp': 1684915200,
|
||||
'modified_timestamp': 1684922446,
|
||||
'modified_timestamp': 1687550432,
|
||||
'series': 'Vykopávky',
|
||||
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
|
||||
'channel_id': 'radio-wave',
|
||||
'upload_date': '20230524',
|
||||
'modified_date': '20230524',
|
||||
'modified_date': '20230623',
|
||||
},
|
||||
}, {
|
||||
# serial extraction
|
||||
@@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
|
||||
'title': 'Nespavci',
|
||||
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
|
||||
},
|
||||
}, {
|
||||
# serialPart
|
||||
'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
|
||||
'info_dict': {
|
||||
'id': '8889035',
|
||||
'ext': 'm4a',
|
||||
'title': 'Gustavo Adolfo Bécquer: Hora duchů',
|
||||
'description': 'md5:343a15257b376c276e210b78e900ffea',
|
||||
'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
|
||||
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
|
||||
'timestamp': 1708173000,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'series': 'Povídka',
|
||||
'modified_date': '20240217',
|
||||
'upload_date': '20240217',
|
||||
'modified_timestamp': 1708173198,
|
||||
'channel_id': 'vltava',
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}]
|
||||
|
||||
def _call_api(self, path, item_id, msg='API JSON'):
|
||||
@@ -322,7 +342,7 @@ class MujRozhlasIE(RozhlasBaseIE):
|
||||
|
||||
entity = info['siteEntityBundle']
|
||||
|
||||
if entity == 'episode':
|
||||
if entity in ('episode', 'serialPart'):
|
||||
return self._extract_audio_entry(self._call_api(
|
||||
'episodes', info['contentId'], 'episode info API JSON'))
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ from ..utils import (
|
||||
get_element_html_by_class,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
unescapeHTML,
|
||||
@@ -18,10 +17,10 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Rule34VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/',
|
||||
'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
|
||||
'md5': 'ffccac2c23799dabbd192621ae4d04f3',
|
||||
'info_dict': {
|
||||
'id': '3065157',
|
||||
@@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'timestamp': 1640131200,
|
||||
'description': '',
|
||||
'creator': 'WildeerStudio',
|
||||
'creators': ['WildeerStudio'],
|
||||
'upload_date': '20211222',
|
||||
'uploader': 'CerZule',
|
||||
'uploader_url': 'https://rule34video.com/members/36281/',
|
||||
@@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
|
||||
'quality': quality,
|
||||
})
|
||||
|
||||
categories, creator, uploader, uploader_url = [None] * 4
|
||||
categories, creators, uploader, uploader_url = [None] * 4
|
||||
for col in get_elements_by_class('col', webpage):
|
||||
label = clean_html(get_element_by_class('label', col))
|
||||
if label == 'Categories:':
|
||||
categories = list(map(clean_html, get_elements_by_class('item', col)))
|
||||
elif label == 'Artist:':
|
||||
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ')
|
||||
creators = list(map(clean_html, get_elements_by_class('item', col)))
|
||||
elif label == 'Uploaded By:':
|
||||
uploader = clean_html(get_element_by_class('name', col))
|
||||
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
|
||||
@@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
|
||||
'comment_count': int_or_none(self._search_regex(
|
||||
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
|
||||
'age_limit': 18,
|
||||
'creator': creator,
|
||||
'creators': creators,
|
||||
'uploader': uploader,
|
||||
'uploader_url': uploader_url,
|
||||
'categories': categories,
|
||||
|
||||
@@ -383,7 +383,7 @@ class RumbleChannelIE(InfoExtractor):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
break
|
||||
raise
|
||||
for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
|
||||
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage):
|
||||
yield self.url_result('https://rumble.com' + video_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -5,7 +5,10 @@ from ..utils import traverse_obj, update_url_query
|
||||
|
||||
|
||||
class ScreencastifyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = [
|
||||
r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)',
|
||||
r'https?://app\.screencastify\.com/v[23]/watch/(?P<id>[^/?#]+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
|
||||
'info_dict': {
|
||||
@@ -19,6 +22,21 @@ class ScreencastifyIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t',
|
||||
'info_dict': {
|
||||
'id': 'J5N7H11wofDN1jZUCr3t',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Scott Piesen',
|
||||
'description': '',
|
||||
'title': 'Lesson Recording 1-17 Burrr...',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
218
yt_dlp/extractor/sejmpl.py
Normal file
218
yt_dlp/extractor/sejmpl.py
Normal file
@@ -0,0 +1,218 @@
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .redge import RedCDNLivxIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def is_dst(date):
|
||||
last_march = datetime.datetime(date.year, 3, 31)
|
||||
last_october = datetime.datetime(date.year, 10, 31)
|
||||
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
|
||||
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
|
||||
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
|
||||
|
||||
|
||||
def rfc3339_to_atende(date):
|
||||
date = datetime.datetime.fromisoformat(date)
|
||||
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
|
||||
return int((date.timestamp() - 978307200) * 1000)
|
||||
|
||||
|
||||
class SejmIE(InfoExtractor):
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
|
||||
r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
|
||||
r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
|
||||
)
|
||||
IE_NAME = 'sejm'
|
||||
|
||||
_TESTS = [{
|
||||
# multiple cameras, polish SL iterpreter
|
||||
'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
|
||||
'info_dict': {
|
||||
'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
|
||||
'title': '1. posiedzenie Sejmu X kadencji',
|
||||
'duration': 20145,
|
||||
'live_status': 'was_live',
|
||||
'location': 'Sala Posiedzeń',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC01-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC01',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC30-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC30',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC31-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC31',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'ENC32-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - ENC32',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
# sign lang interpreter
|
||||
'info_dict': {
|
||||
'id': 'Migacz-ENC01-1-722340000000-722360145000',
|
||||
'ext': 'mp4',
|
||||
'duration': 20145,
|
||||
'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
|
||||
'info_dict': {
|
||||
'id': '9377A9D65518E9A5C125808E002E9FF2',
|
||||
'title': 'Debata "Lepsza Polska: obywatelska"',
|
||||
'description': 'KP .Nowoczesna',
|
||||
'duration': 8770,
|
||||
'live_status': 'was_live',
|
||||
'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ENC08-1-503831270000-503840040000',
|
||||
'ext': 'mp4',
|
||||
'duration': 8770,
|
||||
'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# 7th term is very special, since it does not use redcdn livx
|
||||
'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'info_dict': {
|
||||
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
|
||||
'description': 'SLD - Biuro Prasowe Klubu',
|
||||
'duration': 514,
|
||||
'location': 'sala 101/bud. C',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
|
||||
'duration': 514,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
term, video_id = self._match_valid_url(url).group('term', 'id')
|
||||
frame = self._download_webpage(
|
||||
f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
|
||||
video_id)
|
||||
# despite it says "transmisje_arch", it works for live streams too!
|
||||
data = self._download_json(
|
||||
f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
|
||||
video_id)
|
||||
params = data['params']
|
||||
|
||||
title = strip_or_none(data.get('title'))
|
||||
|
||||
if data.get('status') == 'VIDEO_ENDED':
|
||||
live_status = 'was_live'
|
||||
elif data.get('status') == 'VIDEO_PLAYING':
|
||||
live_status = 'is_live'
|
||||
else:
|
||||
live_status = None
|
||||
self.report_warning(f'unknown status: {data.get("status")}')
|
||||
|
||||
start_time = rfc3339_to_atende(params['start'])
|
||||
# current streams have a stop time of *expected* end of session, but actual times
|
||||
# can change during the transmission. setting a stop_time would artificially
|
||||
# end the stream at that time, while the session actually keeps going.
|
||||
if live_status == 'was_live':
|
||||
stop_time = rfc3339_to_atende(params['stop'])
|
||||
duration = (stop_time - start_time) // 1000
|
||||
else:
|
||||
stop_time, duration = None, None
|
||||
|
||||
entries = []
|
||||
|
||||
def add_entry(file, legacy_file=False):
|
||||
if not file:
|
||||
return
|
||||
file = self._proto_relative_url(file)
|
||||
if not legacy_file:
|
||||
file = update_url_query(file, {'startTime': start_time})
|
||||
if stop_time is not None:
|
||||
file = update_url_query(file, {'stopTime': stop_time})
|
||||
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
|
||||
common_info = {
|
||||
'url': file,
|
||||
'duration': duration,
|
||||
}
|
||||
if legacy_file:
|
||||
entries.append({
|
||||
**common_info,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
})
|
||||
else:
|
||||
entries.append({
|
||||
**common_info,
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RedCDNLivxIE.ie_key(),
|
||||
'id': stream_id,
|
||||
'title': join_nonempty(title, stream_id, delim=' - '),
|
||||
})
|
||||
|
||||
cameras = self._search_json(
|
||||
r'var\s+cameras\s*=', frame, 'camera list', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
for camera_file in traverse_obj(cameras, (..., 'file', {dict})):
|
||||
if camera_file.get('flv'):
|
||||
add_entry(camera_file['flv'])
|
||||
elif camera_file.get('mp4'):
|
||||
# this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
|
||||
add_entry(camera_file['mp4'], legacy_file=True)
|
||||
else:
|
||||
self.report_warning('Unknown camera stream type found')
|
||||
|
||||
if params.get('mig'):
|
||||
add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clean_html(data.get('desc')) or None,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
'location': strip_or_none(data.get('location')),
|
||||
}
|
||||
@@ -7,8 +7,6 @@ from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
@@ -388,15 +386,55 @@ class SVTSeriesIE(SVTPlayBaseIE):
|
||||
dict_get(series, ('longDescription', 'shortDescription')))
|
||||
|
||||
|
||||
class SVTPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
|
||||
class SVTPageIE(SVTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
|
||||
'info_dict': {
|
||||
'title': 'Viktor, 18, förlorade armar och ben i sepsis – vill återuppta karaten och bli svetsare',
|
||||
'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
|
||||
'info_dict': {
|
||||
'id': 'jXvk42E',
|
||||
'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
|
||||
'ext': 'mp4',
|
||||
"duration": 80,
|
||||
'age_limit': 0,
|
||||
'timestamp': 1704370009,
|
||||
'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
|
||||
'series': 'Lokala Nyheter Skåne',
|
||||
'upload_date': '20240104'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
|
||||
'info_dict': {
|
||||
'title': '2023 tungt år för svensk media',
|
||||
'id': 'ewqAZv4',
|
||||
'ext': 'mp4',
|
||||
"duration": 3074,
|
||||
'age_limit': 0,
|
||||
'series': '',
|
||||
'timestamp': 1702980479,
|
||||
'upload_date': '20231219',
|
||||
'episode': 'Mediestudier'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
|
||||
'info_dict': {
|
||||
'id': '25298267',
|
||||
'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'skip': 'Video is gone'
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
|
||||
'info_dict': {
|
||||
@@ -404,6 +442,7 @@ class SVTPageIE(InfoExtractor):
|
||||
'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'skip': 'Video is gone'
|
||||
}, {
|
||||
# only programTitle
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
@@ -414,6 +453,7 @@ class SVTPageIE(InfoExtractor):
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Video is gone'
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
|
||||
'only_matching': True,
|
||||
@@ -427,26 +467,23 @@ class SVTPageIE(InfoExtractor):
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, display_id = self._match_valid_url(url).groups()
|
||||
display_id = self._match_id(url)
|
||||
|
||||
article = self._download_json(
|
||||
'https://api.svt.se/nss-api/page/' + path, display_id,
|
||||
query={'q': 'articles'})['articles']['content'][0]
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
entries = []
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id)
|
||||
|
||||
def _process_content(content):
|
||||
if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
|
||||
video_id = compat_str(content['image']['svtId'])
|
||||
entries.append(self.url_result(
|
||||
'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
for media in article.get('media', []):
|
||||
_process_content(media)
|
||||
def entries():
|
||||
for video_id in set(traverse_obj(data, (
|
||||
'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
|
||||
))):
|
||||
info = self._extract_video(
|
||||
self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id)
|
||||
info['title'] = title
|
||||
yield info
|
||||
|
||||
for obj in article.get('structuredBody', []):
|
||||
_process_content(obj.get('content') or {})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')),
|
||||
strip_or_none(article.get('title')))
|
||||
return self.playlist_result(entries(), display_id, title)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj
|
||||
|
||||
|
||||
class SwearnetEpisodeIE(InfoExtractor):
|
||||
@@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
|
||||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
try:
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
except ExtractorError:
|
||||
if 'Upgrade Now' in webpage:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -15,7 +15,6 @@ from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
@@ -219,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||
def extract_addr(addr, add_meta={}):
|
||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||
if res:
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
|
||||
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
||||
parsed_meta.update(known_resolutions.get(res, {}))
|
||||
add_meta.setdefault('height', int_or_none(res[:-1]))
|
||||
return [{
|
||||
@@ -237,22 +236,26 @@ class TikTokBaseIE(InfoExtractor):
|
||||
|
||||
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
||||
formats = []
|
||||
width = int_or_none(video_info.get('width'))
|
||||
height = int_or_none(video_info.get('height'))
|
||||
if video_info.get('play_addr'):
|
||||
formats.extend(extract_addr(video_info['play_addr'], {
|
||||
'format_id': 'play_addr',
|
||||
'format_note': 'Direct video',
|
||||
'vcodec': 'h265' if traverse_obj(
|
||||
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'width': width,
|
||||
'height': height,
|
||||
}))
|
||||
if video_info.get('download_addr'):
|
||||
formats.extend(extract_addr(video_info['download_addr'], {
|
||||
download_addr = video_info['download_addr']
|
||||
dl_width = int_or_none(download_addr.get('width'))
|
||||
formats.extend(extract_addr(download_addr, {
|
||||
'format_id': 'download_addr',
|
||||
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
|
||||
'vcodec': 'h264',
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'width': dl_width or width,
|
||||
'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
|
||||
'preference': -2 if video_info.get('has_watermark') else -1,
|
||||
}))
|
||||
if video_info.get('play_addr_h264'):
|
||||
@@ -315,9 +318,6 @@ class TikTokBaseIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': aweme_id,
|
||||
'extractor_key': TikTokIE.ie_key(),
|
||||
'extractor': TikTokIE.IE_NAME,
|
||||
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'description': ('desc', {str}),
|
||||
@@ -921,20 +921,23 @@ class DouyinIE(TikTokBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.douyin.com/video/6961737553342991651',
|
||||
'md5': 'a97db7e3e67eb57bf40735c022ffa228',
|
||||
'md5': '9ecce7bc5b302601018ecb2871c63a75',
|
||||
'info_dict': {
|
||||
'id': '6961737553342991651',
|
||||
'ext': 'mp4',
|
||||
'title': '#杨超越 小小水手带你去远航❤️',
|
||||
'description': '#杨超越 小小水手带你去远航❤️',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 19782,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 19,
|
||||
'timestamp': 1620905839,
|
||||
'upload_date': '20210513',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
@@ -943,20 +946,23 @@ class DouyinIE(TikTokBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||
'md5': '34a87ebff3833357733da3fe17e37c0e',
|
||||
'md5': '15c5e660b7048af3707304e3cc02bbb5',
|
||||
'info_dict': {
|
||||
'id': '6982497745948921092',
|
||||
'ext': 'mp4',
|
||||
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'uploader': '0731chaoyue',
|
||||
'uploader_id': '408654318141572',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'creator': '杨超越工作室',
|
||||
'duration': 42479,
|
||||
'creators': ['杨超越工作室'],
|
||||
'duration': 42,
|
||||
'timestamp': 1625739481,
|
||||
'upload_date': '20210708',
|
||||
'track': '@杨超越工作室创作的原声',
|
||||
'artists': ['杨超越工作室'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
@@ -965,20 +971,23 @@ class DouyinIE(TikTokBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||
'md5': 'dde3302460f19db59c47060ff013b902',
|
||||
'md5': '0e6443758b8355db9a3c34864a4276be',
|
||||
'info_dict': {
|
||||
'id': '6953975910773099811',
|
||||
'ext': 'mp4',
|
||||
'title': '#一起看海 出现在你的夏日里',
|
||||
'description': '#一起看海 出现在你的夏日里',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 17343,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 17,
|
||||
'timestamp': 1619098692,
|
||||
'upload_date': '20210422',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
@@ -1004,20 +1013,23 @@ class DouyinIE(TikTokBaseIE):
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6963263655114722595',
|
||||
'md5': 'cf9f11f0ec45d131445ec2f06766e122',
|
||||
'md5': '1440bcf59d8700f8e014da073a4dfea8',
|
||||
'info_dict': {
|
||||
'id': '6963263655114722595',
|
||||
'ext': 'mp4',
|
||||
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'uploader': '6897520xka',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 15115,
|
||||
'creators': ['杨超越'],
|
||||
'duration': 15,
|
||||
'timestamp': 1621261163,
|
||||
'upload_date': '20210517',
|
||||
'track': '@杨超越创作的原声',
|
||||
'artists': ['杨超越'],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
@@ -1025,34 +1037,23 @@ class DouyinIE(TikTokBaseIE):
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}]
|
||||
_APP_VERSIONS = [('23.3.0', '230300')]
|
||||
_APP_NAME = 'aweme'
|
||||
_AID = 1128
|
||||
_API_HOSTNAME = 'aweme.snssdk.com'
|
||||
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
|
||||
_WEBPAGE_HOST = 'https://www.douyin.com/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
return self._extract_aweme_app(video_id)
|
||||
except ExtractorError as e:
|
||||
e.expected = True
|
||||
self.to_screen(f'{e}; trying with webpage')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
render_data = self._search_json(
|
||||
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
|
||||
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
|
||||
if not render_data:
|
||||
detail = traverse_obj(self._download_json(
|
||||
'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
|
||||
'Downloading web detail JSON', 'Failed to download web detail JSON',
|
||||
query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
|
||||
if not detail:
|
||||
# TODO: Run verification challenge code to generate signature cookies
|
||||
cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
|
||||
raise ExtractorError(
|
||||
'Fresh cookies (not necessarily logged in) are needed', expected=expected)
|
||||
'Fresh cookies (not necessarily logged in) are needed',
|
||||
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
|
||||
|
||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
|
||||
return self._parse_aweme_video_app(detail)
|
||||
|
||||
|
||||
class TikTokVMIE(InfoExtractor):
|
||||
|
||||
@@ -21,7 +21,7 @@ from ..utils import (
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
# TVPlayer 2 in js wrapper
|
||||
@@ -514,7 +514,7 @@ class TVPVODBaseIE(InfoExtractor):
|
||||
|
||||
class TVPVODVideoIE(TVPVODBaseIE):
|
||||
IE_NAME = 'tvp:vod'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||
@@ -560,12 +560,23 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/live,1/tvp-world,399731',
|
||||
'info_dict': {
|
||||
'id': '399731',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
category, video_id = self._match_valid_url(url).group('category', 'id')
|
||||
|
||||
info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False)
|
||||
is_live = category == 'live,1'
|
||||
entity = 'lives' if is_live else 'vods'
|
||||
info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False)
|
||||
|
||||
playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
|
||||
|
||||
@@ -582,6 +593,8 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
info_dict['is_live'] = is_live
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
|
||||
@@ -100,9 +100,13 @@ class TwitterBaseIE(InfoExtractor):
|
||||
if not variant_url:
|
||||
return [], {}
|
||||
elif '.m3u8' in variant_url:
|
||||
return self._extract_m3u8_formats_and_subtitles(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
variant_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
|
||||
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
|
||||
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
|
||||
return fmts, subs
|
||||
else:
|
||||
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
|
||||
f = {
|
||||
@@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
|
||||
'channel_id': '549749560',
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
'duration': 12.922,
|
||||
@@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'age_limit': 18,
|
||||
'_old_archive_ids': ['twitter 643211948184596480'],
|
||||
},
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
|
||||
@@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': r're:Star Wars.*A new beginning is coming December 18.*',
|
||||
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
||||
'channel_id': '20106852',
|
||||
'uploader_id': 'starwars',
|
||||
'uploader': r're:Star Wars.*',
|
||||
'timestamp': 1447395772,
|
||||
@@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'channel_id': '1383165541',
|
||||
'uploader': 'jaydin donte geer',
|
||||
'uploader_id': 'jaydingeer',
|
||||
'duration': 30.0,
|
||||
@@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
|
||||
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
|
||||
'channel_id': '701615052',
|
||||
'uploader_id': 'CaptainAmerica',
|
||||
'uploader': 'Captain America',
|
||||
'duration': 3.17,
|
||||
@@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
|
||||
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
|
||||
'channel_id': '2526757026',
|
||||
'uploader': 'عالم الأخبار',
|
||||
'uploader_id': 'news_al3alm',
|
||||
'duration': 277.4,
|
||||
@@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
|
||||
'channel_id': '2319432498',
|
||||
'uploader': 'Préfet de Guadeloupe',
|
||||
'uploader_id': 'Prefet971',
|
||||
'duration': 47.48,
|
||||
@@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 're:.*?Shep is on a roll today.*?',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
|
||||
'channel_id': '255036353',
|
||||
'uploader': 'Lis Power',
|
||||
'uploader_id': 'LisPower1',
|
||||
'duration': 111.278,
|
||||
@@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
|
||||
'channel_id': '18552281',
|
||||
'uploader': 'Brooklyn Nets',
|
||||
'uploader_id': 'BrooklynNets',
|
||||
'duration': 324.484,
|
||||
@@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1577855447914409984',
|
||||
'display_id': '1577855540407197696',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9d198efb93557b8f8d5b78c480407214',
|
||||
'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
|
||||
'description': 'md5:b9c3699335447391d11753ab21c70a74',
|
||||
'upload_date': '20221006',
|
||||
'uploader': 'oshtru',
|
||||
'channel_id': '143077138',
|
||||
'uploader': 'Oshtru',
|
||||
'uploader_id': 'oshtru',
|
||||
'uploader_url': 'https://twitter.com/oshtru',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
@@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||
'info_dict': {
|
||||
'id': '1577719286659006464',
|
||||
'title': 'Ultima - Test',
|
||||
'title': 'Ultima Reload - Test',
|
||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||
'uploader': 'Ultima',
|
||||
'channel_id': '168922496',
|
||||
'uploader': 'Ultima Reload',
|
||||
'uploader_id': 'UltimaShadowX',
|
||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||
'upload_date': '20221005',
|
||||
@@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:95aea692fda36a12081b9629b02daa92',
|
||||
'channel_id': '1094109584',
|
||||
'uploader': 'Max Olson',
|
||||
'uploader_id': 'MesoMax919',
|
||||
'uploader_url': 'https://twitter.com/MesoMax919',
|
||||
@@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'channel_id': '1217167793541480450',
|
||||
'uploader': str,
|
||||
'uploader_id': 'Rizdraws',
|
||||
'uploader_url': 'https://twitter.com/Rizdraws',
|
||||
@@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': []
|
||||
'tags': [],
|
||||
'_old_archive_ids': ['twitter 1575199173472927762'],
|
||||
},
|
||||
'params': {'skip_download': 'The media could not be played'},
|
||||
'skip': 'Requires authentication',
|
||||
@@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1395079556562706435',
|
||||
'title': str,
|
||||
'tags': [],
|
||||
'channel_id': '21539378',
|
||||
'uploader': str,
|
||||
'like_count': int,
|
||||
'upload_date': '20210519',
|
||||
@@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1578353380363501568',
|
||||
'title': str,
|
||||
'channel_id': '2195866214',
|
||||
'uploader_id': 'DavidToons_',
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
@@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1578401165338976258',
|
||||
'title': str,
|
||||
'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
|
||||
'channel_id': '19338359',
|
||||
'uploader': str,
|
||||
'uploader_id': 'primevideouk',
|
||||
'timestamp': 1665155137,
|
||||
@@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||
'comment_count': int,
|
||||
'uploader_id': 'CTVJLaidlaw',
|
||||
'channel_id': '80082014',
|
||||
'repost_count': int,
|
||||
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
|
||||
'upload_date': '20221208',
|
||||
@@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1670459604.0,
|
||||
'channel_id': '80082014',
|
||||
'uploader_id': 'CTVJLaidlaw',
|
||||
'uploader': 'Jocelyn Laidlaw',
|
||||
'repost_count': int,
|
||||
@@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': '뽀 - 아 최우제 이동속도 봐',
|
||||
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
|
||||
'duration': 24.598,
|
||||
'channel_id': '1281839411068432384',
|
||||
'uploader': '뽀',
|
||||
'uploader_id': 's2FAKER',
|
||||
'uploader_url': 'https://twitter.com/s2FAKER',
|
||||
@@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['twitter 1621117700482416640'],
|
||||
},
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||
'info_dict': {
|
||||
@@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'display_id': '1599108751385972737',
|
||||
'ext': 'mp4',
|
||||
'title': '\u06ea - \U0001F48B',
|
||||
'channel_id': '1347791436809441283',
|
||||
'uploader_url': 'https://twitter.com/hlo_again',
|
||||
'like_count': int,
|
||||
'uploader_id': 'hlo_again',
|
||||
@@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'id': '1600009362759733248',
|
||||
'display_id': '1600009574919962625',
|
||||
'ext': 'mp4',
|
||||
'channel_id': '211814412',
|
||||
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||
@@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'channel_id': '15212187',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'bennyjohnson',
|
||||
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||
@@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'display_id': '1695424220702888009',
|
||||
'title': 'md5:e8daa9527bc2b947121395494f786d9d',
|
||||
'description': 'md5:004f2d37fd58737724ec75bc7e679938',
|
||||
'channel_id': '15212187',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'bennyjohnson',
|
||||
'uploader_url': 'https://twitter.com/bennyjohnson',
|
||||
@@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
}, {
|
||||
# Animated gif and quote tweet video, with syndication API
|
||||
# Animated gif and quote tweet video
|
||||
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
@@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'title': 'BAKOON - https://t.co/zom968d0a0',
|
||||
'description': 'https://t.co/zom968d0a0',
|
||||
'tags': [],
|
||||
'channel_id': '1263540390',
|
||||
'uploader': 'BAKOON',
|
||||
'uploader_id': 'BAKKOOONN',
|
||||
'uploader_url': 'https://twitter.com/BAKKOOONN',
|
||||
@@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
|
||||
'timestamp': 1693254077.0,
|
||||
'upload_date': '20230828',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
||||
'expected_warnings': ['Not all metadata'],
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
# "stale tweet" with typename "TweetWithVisibilityResults"
|
||||
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
||||
'md5': '62b1e11cdc2cdd0e527f83adb081f536',
|
||||
'md5': '511377ff8dfa7545307084dca4dce319',
|
||||
'info_dict': {
|
||||
'id': '1724883339285544960',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
|
||||
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
|
||||
'display_id': '1724884212803834154',
|
||||
'channel_id': '337808606',
|
||||
'uploader': 'Robert F. Kennedy Jr',
|
||||
'uploader_id': 'RobertKennedyJr',
|
||||
'uploader_url': 'https://twitter.com/RobertKennedyJr',
|
||||
@@ -1386,6 +1418,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'timestamp': unified_timestamp(status.get('created_at')),
|
||||
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
|
||||
'like_count': int_or_none(status.get('favorite_count')),
|
||||
|
||||
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
@@ -83,6 +84,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://txxx.tube/videos/16574965/digital-desire-malena-morgan/',
|
||||
@@ -98,6 +100,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vxxx.com/video-68925/',
|
||||
@@ -113,6 +116,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vxxx.com/contents/videos_sources/68000/68925/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hclips.com/videos/6291073/malena-morgan-masturbates-her-sweet/',
|
||||
@@ -128,6 +132,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/6291000/6291073/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hdzog.com/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
|
||||
@@ -143,6 +148,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hdzog.tube/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/',
|
||||
@@ -158,6 +164,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hotmovs.com/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
|
||||
@@ -173,6 +180,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://hotmovs.tube/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/',
|
||||
@@ -188,6 +196,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://inporn.com/video/517897/malena-morgan-solo/',
|
||||
@@ -203,6 +212,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://iptn.m3pd.com/media/tn/sources/517897_1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://privatehomeclips.com/videos/3630599/malena-morgan-cam-show/',
|
||||
@@ -218,6 +228,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/3630000/3630599/screenshots/15.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://tubepornclassic.com/videos/1015455/mimi-rogers-full-body-massage-nude-compilation/',
|
||||
@@ -233,6 +244,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.tubepornclassic.com/contents/videos_sources/1015000/1015455/screenshots/6.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://upornia.com/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
|
||||
@@ -248,6 +260,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://upornia.tube/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/',
|
||||
@@ -263,6 +276,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vjav.com/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
|
||||
@@ -278,6 +292,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vjav.tube/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/',
|
||||
@@ -293,6 +308,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://voyeurhit.com/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
|
||||
@@ -308,6 +324,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://voyeurhit.tube/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/',
|
||||
@@ -323,6 +340,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -338,6 +356,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/5119000/5119660/screenshots/1.jpg',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -371,6 +390,7 @@ class TxxxIE(InfoExtractor):
|
||||
'like_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'likes'))),
|
||||
'dislike_count': int_or_none(traverse_obj(video_info, ('video', 'statistics', 'dislikes'))),
|
||||
'age_limit': 18,
|
||||
'thumbnail': traverse_obj(video_info, ('video', 'thumbsrc', {url_or_none})),
|
||||
'formats': get_formats(host, video_file),
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class UtreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?utreon\.com/v/(?P<id>[\w-]+)'
|
||||
IE_NAME = 'playeur'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:utreon|playeur)\.com/v/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://utreon.com/v/z_I7ikQbuDw',
|
||||
'info_dict': {
|
||||
@@ -19,8 +20,9 @@ class UtreonIE(InfoExtractor):
|
||||
'title': 'Freedom Friday meditation - Rising in the wind',
|
||||
'description': 'md5:a9bf15a42434a062fe313b938343ad1b',
|
||||
'uploader': 'Heather Dawn Elemental Health',
|
||||
'thumbnail': 'https://data-1.utreon.com/v/MG/M2/NT/z_I7ikQbuDw/z_I7ikQbuDw_preview.jpg',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 586,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/jerJw5EOOVU',
|
||||
@@ -28,10 +30,11 @@ class UtreonIE(InfoExtractor):
|
||||
'id': 'jerJw5EOOVU',
|
||||
'ext': 'mp4',
|
||||
'title': 'When I\'m alone, I love to reflect in peace, to make my dreams come true... [Quotes and Poems]',
|
||||
'description': 'md5:61ee6c2da98be51b04b969ca80273aaa',
|
||||
'description': 'md5:4026aa3a2c10169c3649926ac8ef62b6',
|
||||
'uploader': 'Frases e Poemas Quotes and Poems',
|
||||
'thumbnail': 'https://data-1.utreon.com/v/Mz/Zh/ND/jerJw5EOOVU/jerJw5EOOVU_89af85470a4b16eededde7f8674c96d9_cover.jpg',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 60,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/C4ZxXhYBBmE',
|
||||
@@ -39,10 +42,11 @@ class UtreonIE(InfoExtractor):
|
||||
'id': 'C4ZxXhYBBmE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biden’s Capital Gains Tax Rate to Test World’s Highest',
|
||||
'description': 'md5:fb5a6c2e506f013cc76f133f673bc5c8',
|
||||
'description': 'md5:995aa9ad0733c0e5863ebdeff954f40e',
|
||||
'uploader': 'Nomad Capitalist',
|
||||
'thumbnail': 'https://data-1.utreon.com/v/ZD/k1/Mj/C4ZxXhYBBmE/C4ZxXhYBBmE_628342076198c9c06dd6b2c665978584_cover.jpg',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210723',
|
||||
'duration': 884,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://utreon.com/v/Y-stEH-FBm8',
|
||||
@@ -52,15 +56,28 @@ class UtreonIE(InfoExtractor):
|
||||
'title': 'Creeper-Chan Pranks Steve! 💚 [MINECRAFT ANIME]',
|
||||
'description': 'md5:7a48450b0d761b96dec194be0c5ecb5f',
|
||||
'uploader': 'Merryweather Comics',
|
||||
'thumbnail': 'https://data-1.utreon.com/v/MT/E4/Zj/Y-stEH-FBm8/Y-stEH-FBm8_5290676a41a4a1096db133b09f54f77b_cover.jpg',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'release_date': '20210718',
|
||||
}},
|
||||
]
|
||||
'duration': 151,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://playeur.com/v/Wzqp-UrxSeu',
|
||||
'info_dict': {
|
||||
'id': 'Wzqp-UrxSeu',
|
||||
'ext': 'mp4',
|
||||
'title': 'Update: Clockwork Basilisk Books on the Way!',
|
||||
'description': 'md5:d9756b0b1884c904655b0e170d17cea5',
|
||||
'uploader': 'Forgotten Weapons',
|
||||
'release_date': '20240208',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'duration': 262,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
'https://api.utreon.com/v1/videos/' + video_id,
|
||||
'https://api.playeur.com/v1/videos/' + video_id,
|
||||
video_id)
|
||||
videos_json = json_data['videos']
|
||||
formats = [{
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import ExtractorError, base_url, int_or_none, url_basename
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
@@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['BG']
|
||||
_TESTS = [{
|
||||
'url': 'http://vbox7.com/play:0946fff23c',
|
||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||
'md5': '50ca1f78345a9c15391af47d8062d074',
|
||||
'info_dict': {
|
||||
'id': '0946fff23c',
|
||||
'ext': 'mp4',
|
||||
@@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
|
||||
'timestamp': 1470982814,
|
||||
'upload_date': '20160812',
|
||||
'uploader': 'zdraveibulgaria',
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118',
|
||||
'view_count': int,
|
||||
'duration': 2640,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vbox7.com/play:249bb972c2',
|
||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||
'md5': 'da1dd2eb245200cb86e6d09d43232116',
|
||||
'info_dict': {
|
||||
'id': '249bb972c2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
'uploader': 'svideteliat_ot_varshava',
|
||||
'view_count': int,
|
||||
'timestamp': 1360215023,
|
||||
'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
|
||||
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
'upload_date': '20130207',
|
||||
'duration': 83,
|
||||
},
|
||||
'skip': 'georestricted',
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||
'only_matching': True,
|
||||
@@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
|
||||
video_id)
|
||||
data = self._download_json(
|
||||
'https://www.vbox7.com/aj/player/item/options', video_id,
|
||||
query={'vid': video_id})['options']
|
||||
|
||||
if 'error' in response:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||
src_url = data.get('src')
|
||||
if src_url in (None, '', 'blank'):
|
||||
raise ExtractorError('Video is unavailable', expected=True)
|
||||
|
||||
video = response['options']
|
||||
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
|
||||
if fmt_base == 'vn':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
title = video['title']
|
||||
video_url = video['src']
|
||||
fmt_base = base_url(src_url) + fmt_base
|
||||
|
||||
if '/na.mp4' in video_url:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
|
||||
# TODO: Add MPD formats, when dash range support is added
|
||||
for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
|
||||
formats.append({
|
||||
'url': f'{fmt_base}_{res}.mp4',
|
||||
'format_id': f'http-{res}',
|
||||
'height': res,
|
||||
})
|
||||
|
||||
uploader = video.get('uploader')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
|
||||
|
||||
info = {}
|
||||
|
||||
if webpage:
|
||||
info = self._search_json_ld(
|
||||
webpage.replace('"/*@context"', '"@context"'), video_id,
|
||||
fatal=False)
|
||||
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'thumbnail': self._proto_relative_url(
|
||||
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
||||
'http:'),
|
||||
})
|
||||
return info
|
||||
'formats': formats,
|
||||
**self._search_json_ld(self._download_webpage(
|
||||
f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('uploader', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ViewLiftBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://prod-api.viewlift.com/'
|
||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb|chorki)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||
_SITE_MAP = {
|
||||
'ftfnext': 'lax',
|
||||
'funnyforfree': 'snagfilms',
|
||||
@@ -27,6 +27,7 @@ class ViewLiftBaseIE(InfoExtractor):
|
||||
'snagxtreme': 'snagfilms',
|
||||
'theidentitytb': 'tampabay',
|
||||
'vayafilm': 'snagfilms',
|
||||
'chorki': 'prothomalo',
|
||||
}
|
||||
_TOKENS = {}
|
||||
|
||||
@@ -296,6 +297,33 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
}, { # Premium movie
|
||||
'url': 'https://www.hoichoi.tv/movies/detective-2020',
|
||||
'only_matching': True
|
||||
}, { # Chorki Premium series
|
||||
'url': 'https://www.chorki.com/bn/series/sinpaat',
|
||||
'playlist_mincount': 7,
|
||||
'info_dict': {
|
||||
'id': 'bn/series/sinpaat',
|
||||
},
|
||||
}, { # Chorki free movie
|
||||
'url': 'https://www.chorki.com/bn/videos/bangla-movie-bikkhov',
|
||||
'info_dict': {
|
||||
'id': '564e755b-f5c7-4515-aee6-8959bee18c93',
|
||||
'title': 'Bikkhov',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230824',
|
||||
'timestamp': 1692860553,
|
||||
'categories': ['Action Movies', 'Salman Special'],
|
||||
'tags': 'count:14',
|
||||
'thumbnail': 'https://snagfilms-a.akamaihd.net/dd078ff5-b16e-45e4-9723-501b56b9df0a/images/2023/08/24/1692860450729_1920x1080_16x9Images.jpg',
|
||||
'display_id': 'bn/videos/bangla-movie-bikkhov',
|
||||
'description': 'md5:71492b086450625f4374a3eb824f27dc',
|
||||
'duration': 8002,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, { # Chorki Premium movie
|
||||
'url': 'https://www.chorki.com/bn/videos/something-like-an-autobiography',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
return url, data, headers
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
webpage = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
'token': viewer['xsrft'],
|
||||
}
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||
try:
|
||||
self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
@@ -269,7 +267,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
|
||||
if not jwt_response.get('jwt'):
|
||||
return
|
||||
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
|
||||
headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
|
||||
original_response = self._download_json(
|
||||
f'https://api.vimeo.com/videos/{video_id}', video_id,
|
||||
headers=headers, fatal=False, expected_status=(403, 404)) or {}
|
||||
@@ -751,6 +749,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
video = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Authorization': 'jwt ' + token,
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
|
||||
})
|
||||
@@ -785,7 +784,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
jwt = viewer['jwt']
|
||||
album = self._download_json(
|
||||
'https://api.vimeo.com/albums/' + album_id,
|
||||
album_id, headers={'Authorization': 'jwt ' + jwt},
|
||||
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
|
||||
query={'fields': 'description,name,privacy'})
|
||||
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||
password = self.get_param('videopassword')
|
||||
@@ -1147,10 +1146,12 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorization,
|
||||
'Accept': 'application/json',
|
||||
})['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
return
|
||||
raise
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
if not link:
|
||||
@@ -1171,7 +1172,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||
jwt = viewer['jwt']
|
||||
album = self._download_json(
|
||||
'https://api.vimeo.com/albums/' + album_id,
|
||||
album_id, headers={'Authorization': 'jwt ' + jwt},
|
||||
album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
|
||||
query={'fields': 'description,name,privacy'})
|
||||
hashed_pass = None
|
||||
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class WASDTVBaseIE(InfoExtractor):
|
||||
|
||||
def _fetch(self, path, video_id, description, query={}):
|
||||
response = self._download_json(
|
||||
f'https://wasd.tv/api/{path}', video_id, query=query,
|
||||
note=f'Downloading {description} metadata',
|
||||
errnote=f'Unable to download {description} metadata')
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
|
||||
return response.get('result')
|
||||
|
||||
def _extract_thumbnails(self, thumbnails_dict):
|
||||
return [{
|
||||
'url': url,
|
||||
'preference': index,
|
||||
} for index, url in enumerate(
|
||||
traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
|
||||
|
||||
def _real_extract(self, url):
|
||||
container = self._get_container(url)
|
||||
stream = traverse_obj(container, ('media_container_streams', 0))
|
||||
media = try_get(stream, lambda x: x['stream_media'][0])
|
||||
if not media:
|
||||
raise ExtractorError('Can not extract media data.', expected=True)
|
||||
media_meta = media.get('media_meta')
|
||||
media_url, is_live = self._get_media_url(media_meta)
|
||||
video_id = media.get('media_id') or container.get('media_container_id')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
|
||||
return {
|
||||
'id': str(video_id),
|
||||
'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
|
||||
'description': container.get('media_container_description'),
|
||||
'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')),
|
||||
'timestamp': parse_iso8601(container.get('created_at')),
|
||||
'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')),
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_container(self, url):
|
||||
raise NotImplementedError('Subclass for get media container')
|
||||
|
||||
def _get_media_url(self, media_meta):
|
||||
raise NotImplementedError('Subclass for get media url')
|
||||
|
||||
|
||||
class WASDTVStreamIE(WASDTVBaseIE):
|
||||
IE_NAME = 'wasdtv:stream'
|
||||
_VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://wasd.tv/24_7',
|
||||
'info_dict': {
|
||||
'id': '559738',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live 24/7 Music',
|
||||
'description': '24/7 Music',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_container(self, url):
|
||||
nickname = self._match_id(url)
|
||||
channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel')
|
||||
channel_id = channel.get('channel_id')
|
||||
containers = self._fetch(
|
||||
'v2/media-containers', channel_id, 'running media containers',
|
||||
query={
|
||||
'channel_id': channel_id,
|
||||
'media_container_type': 'SINGLE',
|
||||
'media_container_status': 'RUNNING',
|
||||
})
|
||||
if not containers:
|
||||
raise ExtractorError(f'{nickname} is offline', expected=True)
|
||||
return containers[0]
|
||||
|
||||
def _get_media_url(self, media_meta):
|
||||
return media_meta['media_url'], True
|
||||
|
||||
|
||||
class WASDTVRecordIE(WASDTVBaseIE):
|
||||
IE_NAME = 'wasdtv:record'
|
||||
_VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P<id>\d+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://wasd.tv/spacemita/videos?record=907755',
|
||||
'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
|
||||
'info_dict': {
|
||||
'id': '906825',
|
||||
'ext': 'mp4',
|
||||
'title': 'Музыкальный',
|
||||
'description': 'md5:f510388d929ff60ae61d4c3cab3137cc',
|
||||
'timestamp': 1645812079,
|
||||
'upload_date': '20220225',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'is_live': False,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wasd.tv/spacemita?record=907755',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_container(self, url):
|
||||
container_id = self._match_id(url)
|
||||
return self._fetch(
|
||||
f'v2/media-containers/{container_id}', container_id, 'media container')
|
||||
|
||||
def _get_media_url(self, media_meta):
|
||||
media_archive_url = media_meta.get('media_archive_url')
|
||||
if media_archive_url:
|
||||
return media_archive_url, False
|
||||
return media_meta['media_url'], True
|
||||
|
||||
|
||||
class WASDTVClipIE(WASDTVBaseIE):
|
||||
IE_NAME = 'wasdtv:clip'
|
||||
_VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
|
||||
_TESTS = [{
|
||||
'url': 'https://wasd.tv/spacemita/clips?clip=26804',
|
||||
'md5': '818885e720143d7a4e776ff66fcff148',
|
||||
'info_dict': {
|
||||
'id': '26804',
|
||||
'ext': 'mp4',
|
||||
'title': 'Пуш флексит на голове стримера',
|
||||
'timestamp': 1646682908,
|
||||
'upload_date': '20220307',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
clip_id = self._match_id(url)
|
||||
clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
|
||||
clip_data = clip.get('clip_data')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
|
||||
'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
|
||||
'timestamp': parse_iso8601(clip.get('created_at')),
|
||||
'view_count': int_or_none(clip.get('clip_views_count')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'Voyeur Girl',
|
||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||
'upload_date': '20190312',
|
||||
'artist': 'Stephen',
|
||||
'artists': ['Stephen'],
|
||||
'creators': ['Stephen'],
|
||||
'track': 'Voyeur Girl',
|
||||
'album': 'it\'s too much love to know my dear',
|
||||
'release_date': '20190313',
|
||||
@@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel': 'Stephen', # TODO: should be "Stephen - Topic"
|
||||
'uploader': 'Stephen',
|
||||
'availability': 'public',
|
||||
'creator': 'Stephen',
|
||||
'duration': 169,
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
|
||||
'age_limit': 0,
|
||||
@@ -3669,15 +3669,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
return orderedSet(requested_clients)
|
||||
|
||||
def _invalid_player_response(self, pr, video_id):
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
|
||||
return pr_id
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
initial_pr = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||
|
||||
prs = []
|
||||
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
prs.append({**initial_pr, 'streamingData': None})
|
||||
|
||||
all_clients = set(clients)
|
||||
clients = clients[::-1]
|
||||
prs = []
|
||||
|
||||
def append_client(*client_names):
|
||||
""" Append the first client name that exists but not already used """
|
||||
@@ -3689,18 +3702,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
all_clients.add(actual_client)
|
||||
return
|
||||
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
if initial_pr:
|
||||
pr = dict(initial_pr)
|
||||
pr['streamingData'] = None
|
||||
prs.append(pr)
|
||||
|
||||
last_error = None
|
||||
tried_iframe_fallback = False
|
||||
player_url = None
|
||||
skipped_clients = {}
|
||||
while clients:
|
||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
@@ -3721,26 +3725,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
self.report_warning(last_error)
|
||||
last_error = e
|
||||
self.report_warning(e)
|
||||
continue
|
||||
|
||||
if pr:
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
|
||||
if pr_video_id and pr_video_id != video_id:
|
||||
self.report_warning(
|
||||
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
|
||||
else:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
|
||||
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
|
||||
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
|
||||
@@ -3751,10 +3748,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif not variant:
|
||||
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
||||
|
||||
if last_error:
|
||||
if not len(prs):
|
||||
raise last_error
|
||||
self.report_warning(last_error)
|
||||
if skipped_clients:
|
||||
self.report_warning(
|
||||
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
||||
f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
|
||||
if not prs:
|
||||
raise ExtractorError(
|
||||
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
|
||||
elif not prs:
|
||||
raise ExtractorError('Failed to extract any player response')
|
||||
return prs, player_url
|
||||
|
||||
def _needs_live_processing(self, live_status, duration):
|
||||
@@ -4418,7 +4420,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
release_year = release_date[:4]
|
||||
info.update({
|
||||
'album': mobj.group('album'.strip()),
|
||||
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
|
||||
'artists': ([a] if (a := mobj.group('clean_artist'))
|
||||
else [a.strip() for a in mobj.group('artist').split('·')]),
|
||||
'track': mobj.group('track').strip(),
|
||||
'release_date': release_date,
|
||||
'release_year': int_or_none(release_year),
|
||||
@@ -4564,7 +4567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if mrr_title == 'Album':
|
||||
info['album'] = mrr_contents_text
|
||||
elif mrr_title == 'Artist':
|
||||
info['artist'] = mrr_contents_text
|
||||
info['artists'] = [mrr_contents_text] if mrr_contents_text else None
|
||||
elif mrr_title == 'Song':
|
||||
info['track'] = mrr_contents_text
|
||||
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
|
||||
@@ -4598,7 +4601,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if fmt.get('protocol') == 'm3u8_native':
|
||||
fmt['__needs_testing'] = True
|
||||
|
||||
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
||||
for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
|
||||
v = info.get(s_k)
|
||||
if v:
|
||||
info[d_k] = v
|
||||
|
||||
71
yt_dlp/extractor/zetland.py
Normal file
71
yt_dlp/extractor/zetland.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts, unified_timestamp, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ZetlandDKArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
|
||||
'info_dict': {
|
||||
'id': 'sO9aq2MY-a81VP3BY-66e69',
|
||||
'ext': 'mp3',
|
||||
'modified_date': '20240118',
|
||||
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
|
||||
'upload_date': '20240116',
|
||||
'uploader_id': 'a81VP3BY',
|
||||
'modified_timestamp': 1705568739,
|
||||
'release_timestamp': 1705377592,
|
||||
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
|
||||
'uploader': 'Helle Fuusager',
|
||||
'release_date': '20240116',
|
||||
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
|
||||
'description': 'md5:9619d426772c133f5abb26db27f26a01',
|
||||
'timestamp': 1705377592,
|
||||
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
|
||||
}
|
||||
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
|
||||
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
|
||||
|
||||
formats = []
|
||||
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
|
||||
formats.append({
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return merge_dicts({
|
||||
'id': display_id,
|
||||
'formats': formats,
|
||||
'uploader_id': uploader_id
|
||||
}, traverse_obj(story_data, {
|
||||
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
|
||||
'uploader': ('sharer', 'name'),
|
||||
'uploader_id': ('sharer', 'sharer_id'),
|
||||
'description': ('story_content', 'content', 'socialDescription'),
|
||||
'series_id': ('story_content', 'meta', 'seriesId'),
|
||||
'release_timestamp': ('published_at', {unified_timestamp}),
|
||||
'modified_timestamp': ('revised_at', {unified_timestamp}),
|
||||
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
|
||||
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
|
||||
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
|
||||
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
|
||||
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
|
||||
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
|
||||
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
|
||||
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
|
||||
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
|
||||
}), get_all=False), {
|
||||
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
|
||||
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
|
||||
'uploader': self._html_search_meta(['author'], webpage),
|
||||
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
|
||||
}, self._search_json_ld(webpage, display_id, fatal=False))
|
||||
Reference in New Issue
Block a user