mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-05 13:37:03 +00:00
Merge remote-tracking branch 'upstream/master' into boomplay
This commit is contained in:
@@ -278,6 +278,7 @@ from .bleacherreport import (
|
||||
from .blerp import BlerpIE
|
||||
from .blogger import BloggerIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bluesky import BlueskyIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boomplay import (
|
||||
@@ -411,8 +412,6 @@ from .cmt import CMTIE
|
||||
from .cnbc import CNBCVideoIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNArticleIE,
|
||||
CNNBlogsIE,
|
||||
CNNIndonesiaIE,
|
||||
)
|
||||
from .comedycentral import (
|
||||
|
||||
@@ -154,7 +154,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
})
|
||||
|
||||
@@ -178,7 +178,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||
'formats': formats,
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
@@ -234,7 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||
'id': ('files', 0, 'file_info_key', {str}),
|
||||
'url': ('files', 0, 'file', {url_or_none}),
|
||||
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
|
||||
@@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
|
||||
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
|
||||
'duration': ('clipLength', {int_or_none}),
|
||||
'filesize': ('clipSizeBytes', {int_or_none}),
|
||||
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
|
||||
'uploader': ('username', {str}),
|
||||
'uploader_id': ('user', '_id', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
|
||||
@@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
|
||||
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
|
||||
'md5': '921919dab3cd0b849ff3d624831ae3e2',
|
||||
'info_dict': {
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'NFL',
|
||||
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
|
||||
'Player Highlights', 'Cleveland Browns', 'league'],
|
||||
'duration': 157,
|
||||
'categories': ['Entertainment', 'Game', 'Highlights'],
|
||||
},
|
||||
}, {
|
||||
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
|
||||
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
|
||||
'md5': '837718bcfb3a7778d022f857f7a9b19e',
|
||||
@@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
reroute = self._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100}, note='Fetching token info')
|
||||
token_type = reroute.get('token_type') or 'Bearer'
|
||||
auth_token = f'{token_type} {reroute["access_token"]}'
|
||||
response = self._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}, note='Fetching NFL API token')
|
||||
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
|
||||
}
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
return int_or_none(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
|
||||
@@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
if extracted_token is not None:
|
||||
api['anvstk2'] = extracted_token
|
||||
elif self._TOKEN_GENERATORS.get(access_key) is not None:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
|
||||
elif self._ANVACK_TABLE.get(access_key) is not None:
|
||||
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
|
||||
else:
|
||||
|
||||
@@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'duration': 2670,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
@@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'info_dict': {
|
||||
'id': '13847165',
|
||||
'chapters': 'count:8',
|
||||
'ext': 'mp4',
|
||||
'channel': 'WDR',
|
||||
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
|
||||
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'series': 'Lokalzeit aus Düsseldorf',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
|
||||
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
|
||||
'upload_date': '20241031',
|
||||
'timestamp': 1730399400,
|
||||
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
|
||||
'duration': 1759,
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
@@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'age_limit': age_limit,
|
||||
**traverse_obj(media_data, {
|
||||
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
|
||||
'start_time': ('chapterTime', {int_or_none}),
|
||||
'title': ('chapterTitle', {str}),
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(media_data, ('meta', {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
@@ -10,7 +9,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
@@ -21,7 +19,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_url': 'https://youtube-dl.bandcamp.com',
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
'artists': ['youtube-dl "\'/\\ä↭'],
|
||||
'album_artists': ['youtube-dl "\'/\\ä↭'],
|
||||
},
|
||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
@@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'album_artists': ['Blazo'],
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'release_date': '20110727',
|
||||
'release_timestamp': 1311724800.0,
|
||||
'track': 'Intro',
|
||||
'uploader_id': 'blazo',
|
||||
'track_number': 1,
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'artists': ['Blazo'],
|
||||
'duration': 19.335,
|
||||
'track_id': '1353101989',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -282,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
'track': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'release_date': '20110727',
|
||||
'track_id': '38097443',
|
||||
'track_number': 2,
|
||||
'duration': 181.467,
|
||||
'uploader_url': 'https://blazo.bandcamp.com',
|
||||
'album': 'Jazz Format Mixtape vol.1',
|
||||
'uploader_id': 'blazo',
|
||||
'album_artists': ['Blazo'],
|
||||
'artists': ['Blazo'],
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
|
||||
'release_timestamp': 1311724800.0,
|
||||
},
|
||||
},
|
||||
],
|
||||
@@ -289,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
@@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'md5': '61acc9a002bed93986b91168aa3ab433',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'ext': 'mp3',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
@@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_id': '224',
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
'format': 'mp3-128',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
@@ -484,7 +509,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||
|
||||
yield from traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
|
||||
{find_element(id='music-grid', html=True)}, {extract_attributes},
|
||||
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -493,4 +518,4 @@ class BandcampUserIE(InfoExtractor):
|
||||
|
||||
return self.playlist_from_matches(
|
||||
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||
getter=functools.partial(urljoin, url))
|
||||
getter=urljoin(url))
|
||||
|
||||
@@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'tbr': ('bitrate', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
@@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
@@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
'timestamp', {int_or_none(scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
@@ -1,18 +1,33 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import ExtractorError, extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
|
||||
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
def _extract_video(self, video_block):
|
||||
video_element = self._search_regex(
|
||||
self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
|
||||
if video_element:
|
||||
video_element_attrs = extract_attributes(video_element)
|
||||
video_id = video_element_attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_element_attrs.get('data-account') or '876450610001'
|
||||
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
|
||||
else:
|
||||
video_block_attrs = extract_attributes(video_block)
|
||||
video_id = video_block_attrs.get('videoid')
|
||||
if not video_id:
|
||||
return
|
||||
account_id = video_block_attrs.get('accountid') or '876630703001'
|
||||
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
@@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
class BFMTVLiveIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'id': '6346069778112',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20220926',
|
||||
'timestamp': 1664207191,
|
||||
'upload_date': '20240202',
|
||||
'timestamp': 1706887572,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://.+/image\.jpg',
|
||||
'tags': [],
|
||||
@@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video = self._extract_video(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
if not video:
|
||||
raise ExtractorError('Failed to extract video')
|
||||
return video
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
@@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, webpage):
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video = self._extract_video(video_block_el)
|
||||
if video:
|
||||
yield video
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
|
||||
@@ -109,7 +109,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('length', {float_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}))
|
||||
if fragments:
|
||||
@@ -124,7 +124,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'quality': ('quality', {int_or_none}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('timelength', {float_or_none(scale=1000)}),
|
||||
}),
|
||||
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||
})
|
||||
@@ -1585,7 +1585,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||
'timestamp': ('ctime', {int_or_none}, filter),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
388
yt_dlp/extractor/bluesky.py
Normal file
388
yt_dlp/extractor/bluesky.py
Normal file
@@ -0,0 +1,388 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
format_field,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
truncate_string,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlueskyIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
|
||||
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
|
||||
'md5': '375539c1930ab05d15585ed772ab54fd',
|
||||
'info_dict': {
|
||||
'id': '3l4omssdl632g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Blu3Blu3Lilith',
|
||||
'uploader_id': 'blu3blue.bsky.social',
|
||||
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
|
||||
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'OMG WE HAVE VIDEOS NOW',
|
||||
'description': 'OMG WE HAVE VIDEOS NOW',
|
||||
'upload_date': '20240921',
|
||||
'timestamp': 1726940605,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
|
||||
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
|
||||
'info_dict': {
|
||||
'id': '3l4qhp7bcs52c',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'souris',
|
||||
'uploader_id': 'souris.moe',
|
||||
'uploader_url': 'https://bsky.app/profile/souris.moe',
|
||||
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l4qhp7bcs52c',
|
||||
'upload_date': '20240922',
|
||||
'timestamp': 1727003838,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'clean',
|
||||
'uploader_id': 'de1.pds.tentacle.expert',
|
||||
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
|
||||
'channel_id': 'did:web:de1.tentacle.expert',
|
||||
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l3w4tnezek2e',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726098823,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
|
||||
'info_dict': {
|
||||
'id': 'XxK3t_5V3ao',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'yunayu',
|
||||
'uploader_id': '@yunayuispink',
|
||||
'uploader_url': 'https://www.youtube.com/@yunayuispink',
|
||||
'channel': 'yunayu',
|
||||
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
|
||||
'description': r're:Have a good goodx10000day',
|
||||
'title': '5min vs 5hours drawing',
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'upload_date': '20241026',
|
||||
'timestamp': 1729967784,
|
||||
'duration': 321,
|
||||
'age_limit': 0,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
|
||||
'info_dict': {
|
||||
'id': '222792849',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'LASERBAT',
|
||||
'uploader_id': 'laserbatx',
|
||||
'uploader_url': 'https://laserbatx.bandcamp.com',
|
||||
'artists': ['LASERBAT'],
|
||||
'album_artists': ['LASERBAT'],
|
||||
'album': 'Hari Nezumi [EP]',
|
||||
'track': 'Forward to the End',
|
||||
'title': 'LASERBAT - Forward to the End',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
|
||||
'duration': 228.571,
|
||||
'track_id': '222792849',
|
||||
'release_date': '20230423',
|
||||
'upload_date': '20230423',
|
||||
'timestamp': 1682276040.0,
|
||||
'release_timestamp': 1682276040.0,
|
||||
'track_number': 1,
|
||||
},
|
||||
'add_ie': ['Bandcamp'],
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
|
||||
'md5': 'b9e344fdbce9f2852c668a97efefb105',
|
||||
'info_dict': {
|
||||
'id': '3l3vgf77uco2g',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Bluesky',
|
||||
'uploader_id': 'bsky.app',
|
||||
'uploader_url': 'https://bsky.app/profile/bsky.app',
|
||||
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky now has video! Update your app to versi...',
|
||||
'alt_title': 'Bluesky video feature announcement',
|
||||
'description': r're:(?s)Bluesky now has video! .{239}',
|
||||
'upload_date': '20240911',
|
||||
'timestamp': 1726074716,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'crazy that i look like this tbh',
|
||||
'description': 'crazy that i look like this tbh',
|
||||
'upload_date': '20241030',
|
||||
'timestamp': 1730332128,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': ['sexual'],
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
|
||||
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
|
||||
'info_dict': {
|
||||
'id': '3l6zrz6zyl2dr',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'mary🐇',
|
||||
'uploader_id': 'mary.my.id',
|
||||
'uploader_url': 'https://bsky.app/profile/mary.my.id',
|
||||
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'title': 'Bluesky video #3l6zrz6zyl2dr',
|
||||
'upload_date': '20241021',
|
||||
'timestamp': 1729523172,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3l7gv55dc2o2w',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241026',
|
||||
'description': 'One of my favorite videos',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
|
||||
'uploader': 'Purple.Ice.Tea',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'like_count': int,
|
||||
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
|
||||
'repost_count': int,
|
||||
'timestamp': 1729973202,
|
||||
'tags': [],
|
||||
'uploader_id': 'purpleicetea.bsky.social',
|
||||
'title': 'One of my favorite videos',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3l77u64l7le2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'hearing people on twitter say that bluesky isn\'...',
|
||||
'like_count': int,
|
||||
'uploader_id': 'thafnine.net',
|
||||
'uploader_url': 'https://bsky.app/profile/thafnine.net',
|
||||
'upload_date': '20241024',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
|
||||
'tags': [],
|
||||
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
|
||||
'uploader': 'T9',
|
||||
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
'timestamp': 1729731642,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}],
|
||||
}]
|
||||
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
|
||||
|
||||
def _get_service_endpoint(self, did, video_id):
|
||||
if did.startswith('did:web:'):
|
||||
url = f'https://{did[8:]}/.well-known/did.json'
|
||||
else:
|
||||
url = f'https://plc.directory/{did}'
|
||||
services = self._download_json(
|
||||
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
|
||||
return traverse_obj(
|
||||
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
|
||||
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
|
||||
|
||||
def _real_extract(self, url):
|
||||
handle, video_id = self._match_valid_url(url).group('handle', 'id')
|
||||
|
||||
post = self._download_json(
|
||||
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
|
||||
video_id, query={
|
||||
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
|
||||
'depth': 0,
|
||||
'parentHeight': 0,
|
||||
})['thread']['post']
|
||||
|
||||
entries = []
|
||||
# app.bsky.embed.video.view/app.bsky.embed.external.view
|
||||
entries.extend(self._extract_videos(post, video_id))
|
||||
# app.bsky.embed.recordWithMedia.view
|
||||
entries.extend(self._extract_videos(
|
||||
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
|
||||
# app.bsky.embed.record.view
|
||||
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
|
||||
entries.extend(self._extract_videos(
|
||||
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No video could be found in this post', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _build_profile_url(path):
|
||||
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
|
||||
|
||||
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
|
||||
embed_path = variadic(embed_path, (str, bytes, dict, set))
|
||||
record_path = variadic(record_path, (str, bytes, dict, set))
|
||||
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
|
||||
|
||||
entries = []
|
||||
if external_uri := traverse_obj(root, (
|
||||
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
|
||||
entries.append(self.url_result(external_uri))
|
||||
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
else:
|
||||
return entries
|
||||
|
||||
video_cid = traverse_obj(
|
||||
root, (*embed_path, 'cid', {str}),
|
||||
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
|
||||
did = traverse_obj(root, ('author', 'did', {str}))
|
||||
|
||||
if did and video_cid:
|
||||
endpoint = self._get_service_endpoint(did, video_id)
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mimeType', {mimetype2ext}),
|
||||
})),
|
||||
})
|
||||
|
||||
for sub_data in traverse_obj(root, (
|
||||
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
|
||||
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(root, {
|
||||
'id': ('uri', {url_basename}),
|
||||
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
|
||||
'alt_title': (*embed_path, 'alt', {str}, filter),
|
||||
'uploader': ('author', 'displayName', {str}),
|
||||
'uploader_id': ('author', 'handle', {str}),
|
||||
'uploader_url': ('author', 'handle', {self._build_profile_url}),
|
||||
'channel_id': ('author', 'did', {str}),
|
||||
'channel_url': ('author', 'did', {self._build_profile_url}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'repost_count': ('repostCount', {int_or_none}),
|
||||
'comment_count': ('replyCount', {int_or_none}),
|
||||
'timestamp': ('indexedAt', {parse_iso8601}),
|
||||
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
|
||||
'age_limit': (
|
||||
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
|
||||
'description': (*record_path, 'text', {str}, filter),
|
||||
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
}),
|
||||
})
|
||||
return entries
|
||||
@@ -1,35 +1,20 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'creators': ['Kooperative Berlin'],
|
||||
'description': r're:Joachim Gauck, .*\n\nKamera: .*',
|
||||
'release_date': '20150716',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
|
||||
'tags': [],
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'creators': ['Axel Schröder'],
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'creators': ['Bundeszentrale für politische Bildung'],
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
@@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
@@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
{find_element(cls='opening-intro')},
|
||||
[{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'creators': traverse_obj(self._html_search_meta('author', webpage), all),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('poster', {urljoin(url)}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
@@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
|
||||
@@ -8,11 +8,13 @@ from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
format_field,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class BundestagIE(InfoExtractor):
|
||||
@@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
|
||||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
{find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({find_element(tag='p')}, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
|
||||
@@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'tags': ('tags', ..., {str}, filter),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
@@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
'thumbnail': ('preview_image_path', {urljoin(url)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
|
||||
@@ -453,8 +453,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
|
||||
chapters = traverse_obj(data, (
|
||||
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
'title': ('name', {str}),
|
||||
}))
|
||||
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||
@@ -465,8 +465,8 @@ class CBCPlayerIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
|
||||
'media_type': ('media', 'clipType', {str}),
|
||||
'series': ('showName', {str}),
|
||||
'season_number': ('media', 'season', {int_or_none}),
|
||||
|
||||
@@ -96,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
|
||||
**traverse_obj(item, {
|
||||
'title': (None, ('fulltitle', 'title')),
|
||||
'description': 'dek',
|
||||
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('timestamp', {float_or_none(scale=1000)}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'subtitles': ('captions', {get_subtitles}),
|
||||
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||
|
||||
@@ -12,53 +12,86 @@ from ..utils import (
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
|
||||
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
# ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
'alt_title': 'EsportMarató2016WEB_PerPublicar',
|
||||
'duration': 79,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
|
||||
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
|
||||
'categories': ['Divulgació'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
# ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
|
||||
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'upload_date': '20161217',
|
||||
'timestamp': 1482011700,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
'series': 'Tot gira',
|
||||
'duration': 821,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
|
||||
'md5': '27493513d08a3e5605814aee9bb778d2',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'timestamp': 1582577919,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'subtitles': 'mincount:1',
|
||||
'age_limit': 13,
|
||||
'series': 'Crims',
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
|
||||
'duration': 3203,
|
||||
'categories': ['Divulgació'],
|
||||
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
|
||||
'info_dict': {
|
||||
'id': '5759227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Una mosca volava per la llum',
|
||||
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
|
||||
'description': 'md5:9ab64276944b0825336f4147f13f7854',
|
||||
'series': 'Mic',
|
||||
'upload_date': '20180411',
|
||||
'timestamp': 1523440105,
|
||||
'duration': 160,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
|
||||
'categories': ['Música'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = self._match_valid_url(url).groups()
|
||||
media_type, media_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
media = self._download_json(
|
||||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
@@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
@@ -146,23 +144,37 @@ class CHZZKVideoIE(InfoExtractor):
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
|
||||
video_status = video_meta.get('vodStatus')
|
||||
if video_status == 'UPLOAD':
|
||||
playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
|
||||
elif video_status == 'ABR_HLS':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
|
||||
video_id, query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
})
|
||||
else:
|
||||
self.raise_no_formats(
|
||||
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
|
||||
formats, subtitles = [], {}
|
||||
live_status = 'post_live' if live_status == 'was_live' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
@@ -85,7 +86,7 @@ class CineverseIE(CineverseBaseIE):
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
|
||||
@@ -1,146 +1,225 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import merge_dicts, try_call, url_basename
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_call,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_elements, traverse_obj
|
||||
|
||||
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'info_dict': {
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
|
||||
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
'upload_date': '20240531',
|
||||
'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
|
||||
'duration': 373.0,
|
||||
'timestamp': 1717148586,
|
||||
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
|
||||
'modified_date': '20240531',
|
||||
'modified_timestamp': 1717150140,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
|
||||
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
|
||||
'ext': 'mp4',
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
|
||||
'title': 'Here’s how some inmates in closely divided state are now able to vote from jail',
|
||||
'timestamp': 1718158269,
|
||||
'upload_date': '20240612',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
|
||||
'duration': 202.0,
|
||||
'modified_date': '20240612',
|
||||
'modified_timestamp': 1718158509,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
|
||||
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
|
||||
'description': 'md5:19f78338ccec533db0fa8a4511012dae',
|
||||
'title': 'Video shows King Charles\' portrait being vandalized by activists',
|
||||
'timestamp': 1718113852,
|
||||
'upload_date': '20240611',
|
||||
'duration': 51.0,
|
||||
'modified_timestamp': 1718116193,
|
||||
'modified_date': '20240611',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
|
||||
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
|
||||
'duration': 158.0,
|
||||
'title': 'Robin Meade signs off after HLN\'s last broadcast',
|
||||
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670284296,
|
||||
'modified_timestamp': 1670332404,
|
||||
'modified_date': '20221206',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'info_dict': {
|
||||
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
|
||||
'timestamp': 1729501452,
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
|
||||
'description': 'md5:256ee7137d161f776cda429654135e52',
|
||||
'upload_date': '20241021',
|
||||
'duration': 31.0,
|
||||
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
|
||||
'modified_date': '20241021',
|
||||
'modified_timestamp': 1729501530,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
|
||||
'info_dict': {
|
||||
'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '073ffab87b8bef97c9913e71cc18ef9e',
|
||||
'info_dict': {
|
||||
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
|
||||
'ext': 'mp4',
|
||||
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
|
||||
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
|
||||
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
|
||||
'duration': 173.0,
|
||||
'timestamp': 1729122182,
|
||||
'upload_date': '20241016',
|
||||
'modified_timestamp': 1729194706,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}, {
|
||||
'md5': '11604ab4af83b650826753f1ccb8ecff',
|
||||
'info_dict': {
|
||||
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
|
||||
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
|
||||
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
|
||||
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
|
||||
'duration': 145.0,
|
||||
'timestamp': 1729137765,
|
||||
'upload_date': '20241017',
|
||||
'modified_timestamp': 1729138184,
|
||||
'modified_date': '20241017',
|
||||
},
|
||||
'params': {'format': 'direct'},
|
||||
}],
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = self._match_valid_url(url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_id = traverse_obj(
|
||||
self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
|
||||
('TOP_AUTH_SERVICE_APP_ID', {str}))
|
||||
|
||||
entries = []
|
||||
for player_data in traverse_obj(webpage, (
|
||||
{find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
|
||||
..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
|
||||
media_id = player_data['data-media-id']
|
||||
parent_uri = player_data.get('data-video-resource-parent-uri')
|
||||
formats, subtitles = [], {}
|
||||
|
||||
video_data = {}
|
||||
if parent_uri:
|
||||
video_data = self._download_json(
|
||||
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
|
||||
query={
|
||||
'id': media_id,
|
||||
'stellarUri': parent_uri,
|
||||
})
|
||||
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
|
||||
resolution, bitrate = None, None
|
||||
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
|
||||
resolution, bitrate = mobj.group('res', 'tbr')
|
||||
formats.append({
|
||||
'url': direct_url,
|
||||
'format_id': 'direct',
|
||||
'quality': 1,
|
||||
'tbr': int_or_none(bitrate),
|
||||
**parse_resolution(resolution),
|
||||
})
|
||||
for sub_data in traverse_obj(video_data, (
|
||||
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
|
||||
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
|
||||
'url': sub_data['url'],
|
||||
'name': sub_data.get('label'),
|
||||
})
|
||||
|
||||
if app_id:
|
||||
media_data = self._download_json(
|
||||
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
|
||||
query={'appId': app_id})
|
||||
m3u8_url = traverse_obj(media_data, (
|
||||
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
|
||||
if m3u8_url:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
entries.append({
|
||||
**traverse_obj(player_data, {
|
||||
'title': ('data-headline', {clean_html}),
|
||||
'description': ('data-description', {clean_html}),
|
||||
'duration': ('data-duration', {parse_duration}),
|
||||
'timestamp': ('data-publish-date', {parse_iso8601}),
|
||||
'thumbnail': (
|
||||
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
|
||||
{update_url(query='c=original')}),
|
||||
'display_id': 'data-video-slug',
|
||||
}),
|
||||
**traverse_obj(video_data, {
|
||||
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
|
||||
'description': ('description', {clean_html}),
|
||||
'title': ('headline', {str}),
|
||||
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
|
||||
'duration': ('trt', {int_or_none}),
|
||||
}),
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||
_TEST = {
|
||||
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminalizing journalism?',
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
|
||||
class CNNIndonesiaIE(InfoExtractor):
|
||||
|
||||
@@ -47,6 +47,7 @@ from ..utils import (
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
ISO639Utils,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
@@ -1408,6 +1409,13 @@ class InfoExtractor:
|
||||
return None, None
|
||||
|
||||
self.write_debug(f'Using netrc for {netrc_machine} authentication')
|
||||
|
||||
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
|
||||
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
|
||||
if sys.version_info < (3, 11):
|
||||
return tuple(x if x != '""' else '' for x in info[::2])
|
||||
|
||||
return info[0], info[2]
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
@@ -1570,7 +1578,9 @@ class InfoExtractor:
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
for mobj in re.finditer(JSON_LD_RE, html):
|
||||
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal,
|
||||
errnote=False if default is not NO_DEFAULT else None)
|
||||
for json_ld in variadic(json_ld_item):
|
||||
if isinstance(json_ld, dict):
|
||||
yield json_ld
|
||||
@@ -3071,7 +3081,11 @@ class InfoExtractor:
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
stream_language = stream.get('Language', 'und')
|
||||
# IsmFD expects ISO 639 Set 2 language codes (3-character length)
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
|
||||
stream_language = stream.get('Language') or 'und'
|
||||
if len(stream_language) != 3:
|
||||
stream_language = ISO639Utils.short2long(stream_language) or 'und'
|
||||
for track in stream.findall('QualityLevel'):
|
||||
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
|
||||
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: urllib.parse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
|
||||
@@ -456,7 +456,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
@@ -484,7 +484,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||
}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('duration_ms', {float_or_none(scale=1000)}),
|
||||
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -10,11 +10,14 @@ from ..utils import (
|
||||
OnDemandPagedList,
|
||||
age_restricted,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
dai\.ly/|
|
||||
(?:
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||
(?:www\.)?lequipe\.fr
|
||||
)/
|
||||
(?:
|
||||
swf/(?!video)|
|
||||
(?:(?:crawler|embed|swf)/)?video/|
|
||||
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||
)
|
||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
)
|
||||
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||
_TESTS = [{
|
||||
@@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
|
||||
@@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['en_quete_d_esprit'],
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080',
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||
'only_matching': True,
|
||||
}, { # playlist-only
|
||||
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://dai.ly/x94cnnk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||
'info_dict': {
|
||||
'id': 'x93blhi',
|
||||
'ext': 'mp4',
|
||||
'title': 'OnAir - 01/08/24',
|
||||
'description': '',
|
||||
'duration': 217,
|
||||
'timestamp': 1722505658,
|
||||
'upload_date': '20240801',
|
||||
'uploader': 'Financialounge',
|
||||
'uploader_id': 'x2vtgmm',
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||
'info_dict': {
|
||||
'id': 'x7wdsj',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||
'info_dict': {
|
||||
'id': 'x8u4owg',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Le Parisien',
|
||||
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||
'upload_date': '20240309',
|
||||
'view_count': int,
|
||||
'timestamp': 1709997866,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32f7b',
|
||||
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||
'duration': 428.0,
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
player_url = url_or_none(attrs.get('src'))
|
||||
if not player_url:
|
||||
continue
|
||||
player_url = player_url.replace('.js', '.html')
|
||||
if player_url.startswith('//'):
|
||||
player_url = f'https:{player_url}'
|
||||
if video_id := attrs.get('data-video'):
|
||||
query_string = f'video={video_id}'
|
||||
elif playlist_id := attrs.get('data-playlist'):
|
||||
query_string = f'playlist={playlist_id}'
|
||||
else:
|
||||
continue
|
||||
yield update_url(player_url, query=query_string)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
||||
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||
|
||||
if playlist_id:
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
if is_playlist: # We matched the playlist query param as video_id
|
||||
playlist_id = video_id
|
||||
video_id = None
|
||||
|
||||
if self._yes_playlist(playlist_id, video_id):
|
||||
return self.url_result(
|
||||
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||
'DailymotionPlaylist', playlist_id)
|
||||
|
||||
password = self.get_param('videopassword')
|
||||
media = self._call_api(
|
||||
@@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
title = metadata['title']
|
||||
is_live = media.get('isOnAir')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for m in media_list:
|
||||
media_url = m.get('url')
|
||||
@@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if media_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
||||
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||
f['fps'] = 60
|
||||
|
||||
subtitles = {}
|
||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||
for subtitle_lang, subtitle in subtitles_data.items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
thumbnails = []
|
||||
for height, poster_url in metadata.get('posters', {}).items():
|
||||
thumbnails.append({
|
||||
'height': int_or_none(height),
|
||||
'id': height,
|
||||
'url': poster_url,
|
||||
})
|
||||
thumbnails = traverse_obj(metadata, (
|
||||
('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
|
||||
'height': (0, {int_or_none}),
|
||||
'id': (0, {str}),
|
||||
'url': 1,
|
||||
}))
|
||||
|
||||
owner = metadata.get('owner') or {}
|
||||
stats = media.get('stats') or {}
|
||||
@@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
|
||||
@@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series': ('show_name', {str}, filter),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
|
||||
@@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'description': (('lead', 'body'), {clean_html}, filter),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
|
||||
@@ -564,11 +564,12 @@ class FacebookIE(InfoExtractor):
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
dash_manifest = traverse_obj(
|
||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=video.get('dash_manifest_url')))
|
||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@@ -618,12 +619,13 @@ class FacebookIE(InfoExtractor):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
playable_url = video.get(key)
|
||||
playable_url = fmt_data.get(key)
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
@@ -635,7 +637,7 @@ class FacebookIE(InfoExtractor):
|
||||
'quality': q(format_id) - 3,
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
|
||||
@@ -8,6 +8,8 @@ from .common import InfoExtractor
|
||||
from .commonprotocols import RtmpIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
@@ -2374,10 +2376,9 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
# Try to impersonate a web-browser by default if possible
|
||||
# Skip impersonation if not available to omit the warning
|
||||
impersonate = self._configuration_arg('impersonate', [''])
|
||||
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
# Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
|
||||
impersonate = self._configuration_arg('impersonate', ['false'])
|
||||
if 'false' in impersonate:
|
||||
impersonate = None
|
||||
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
@@ -2388,10 +2389,29 @@ class GenericIE(InfoExtractor):
|
||||
# to accept raw bytes and being able to download only a chunk.
|
||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||
# after a HEAD request, but not sure if we can rely on this.
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
try:
|
||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||
'Accept-Encoding': 'identity',
|
||||
'Referer': smuggled_data.get('referer'),
|
||||
}), impersonate=impersonate)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||
and e.cause.response.extensions.get('impersonate') is None):
|
||||
raise
|
||||
cf_cookie_domain = traverse_obj(
|
||||
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
|
||||
('__cf_bm', 'domain'))
|
||||
if cf_cookie_domain:
|
||||
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
|
||||
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
|
||||
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
|
||||
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
|
||||
'how to install the required impersonation dependency, and ')
|
||||
raise ExtractorError(
|
||||
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
|
||||
|
||||
new_url = full_response.url
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('milliseconds', {float_or_none(scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, {lambda x: x or None}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
@@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE):
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, {lambda x: x or None}),
|
||||
'season': ('seasonName', {str}, {lambda x: x or None}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, {lambda x: x or None}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -137,7 +136,7 @@ class KickVODIE(KickBaseIE):
|
||||
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
|
||||
@@ -119,7 +119,7 @@ class KikaIE(InfoExtractor):
|
||||
'width': ('frameWidth', {int_or_none}),
|
||||
'height': ('frameHeight', {int_or_none}),
|
||||
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
|
||||
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
|
||||
'filesize': ('fileSize', {int_or_none}, filter),
|
||||
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
}),
|
||||
|
||||
@@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'webpage_url': ('path', {urljoin('https://laracasts.com')}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
@@ -104,7 +104,7 @@ class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('taxonomy', 'name', {str}, all, filter),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
|
||||
@@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
'license': ('value', 'license', {str}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'release_timestamp': ('value', 'release_time', {int_or_none}),
|
||||
'tags': ('value', 'tags', ..., {lambda x: x or None}),
|
||||
'tags': ('value', 'tags', ..., filter),
|
||||
'duration': ('value', stream_type, 'duration', {int_or_none}),
|
||||
'channel': ('signing_channel', 'value', 'title', {str}),
|
||||
'channel_id': ('signing_channel', 'claim_id', {str}),
|
||||
@@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor):
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
IE_DESC = 'odysee.com'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'''
|
||||
(?:\$/(?:download|embed)/)?
|
||||
(?P<id>
|
||||
@@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
IE_DESC = 'odysee.com channels'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P<id>@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
@@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE):
|
||||
|
||||
class LBRYPlaylistIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:playlist'
|
||||
IE_DESC = 'odysee.com playlists'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2',
|
||||
|
||||
@@ -6,13 +6,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
@@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
self.raise_login_required(method='session_cookies')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
{find_element(id='programme-details', html=True)}, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{find_element(cls='broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
{find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{find_element(id='add-to-existing-playlist', html=True)},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ListenNotesIE(InfoExtractor):
|
||||
@@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'KrDgvNb_u1n',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
|
||||
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
|
||||
'duration': 2148.0,
|
||||
'channel': 'Thriving on Overload',
|
||||
'title': r're:Tim O’Reilly on noticing things other people .{113}',
|
||||
'description': r're:(?s)‘’We shape reality by what we notice and .{27459}',
|
||||
'duration': 2215.0,
|
||||
'channel': 'Amplifying Cognition',
|
||||
'channel_id': 'ed84wITivxF',
|
||||
'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
|
||||
'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
|
||||
},
|
||||
}, {
|
||||
@@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': 'lwEA3154JzG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 177: WireGuard with Jason Donenfeld',
|
||||
'description': 'md5:24744f36456a3e95f83c1193a3458594',
|
||||
'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
|
||||
'duration': 3861.0,
|
||||
'channel': 'Ask Noah Show',
|
||||
'channel_id': '4DQTzdS5-j7',
|
||||
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
|
||||
'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
|
||||
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
|
||||
},
|
||||
}]
|
||||
@@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor):
|
||||
'id': audio_id,
|
||||
'url': data['audio'],
|
||||
'title': (data.get('data-title')
|
||||
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
|
||||
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
|
||||
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
|
||||
or strip_or_none(description)),
|
||||
|
||||
@@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
|
||||
@@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor):
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
'categories': ('mainGenre', {str}, all, filter),
|
||||
})),
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ class MediaStreamBaseIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), (
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None, default={})), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ class MixchIE(InfoExtractor):
|
||||
note='Downloading comments', errnote='Failed to download comments'), (..., {
|
||||
'author': ('name', {str}),
|
||||
'author_id': ('user_id', {str_or_none}),
|
||||
'id': ('message_id', {str}, {lambda x: x or None}),
|
||||
'id': ('message_id', {str}, filter),
|
||||
'text': ('body', {str}),
|
||||
'timestamp': ('created', {int}),
|
||||
}))
|
||||
|
||||
@@ -4,15 +4,11 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class MonstercatIE(InfoExtractor):
|
||||
@@ -26,19 +22,21 @@ class MonstercatIE(InfoExtractor):
|
||||
'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
|
||||
'release_date': '20230711',
|
||||
'album': 'The Secret Language of Trees',
|
||||
'album_artist': 'BT',
|
||||
'album_artists': ['BT'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_tracks(self, table, album_meta):
|
||||
for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag
|
||||
title = clean_html(try_call(
|
||||
lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0]))
|
||||
ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '')
|
||||
title = traverse_obj(td, (
|
||||
{find_element(cls='d-inline-flex flex-column')},
|
||||
{lambda x: x.partition(' <span')}, 0, {clean_html}))
|
||||
ids = traverse_obj(td, (
|
||||
{find_element(cls='btn-play cursor-pointer mr-small', html=True)}, {extract_attributes})) or {}
|
||||
track_id = ids.get('data-track-id')
|
||||
release_id = ids.get('data-release-id')
|
||||
|
||||
track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td)))
|
||||
track_number = traverse_obj(td, ({find_element(cls='py-xsmall')}, {int_or_none}))
|
||||
if not track_id or not release_id:
|
||||
self.report_warning(f'Skipping track {track_number}, ID(s) not found')
|
||||
self.write_debug(f'release_id={release_id!r} track_id={track_id!r}')
|
||||
@@ -48,7 +46,7 @@ class MonstercatIE(InfoExtractor):
|
||||
'title': title,
|
||||
'track': title,
|
||||
'track_number': track_number,
|
||||
'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))),
|
||||
'artists': traverse_obj(td, ({find_element(cls='d-block fs-xxsmall')}, {clean_html}, all)),
|
||||
'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
|
||||
'id': track_id,
|
||||
'ext': 'mp3',
|
||||
@@ -57,20 +55,19 @@ class MonstercatIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
url_id = self._match_id(url)
|
||||
html = self._download_webpage(url, url_id)
|
||||
# wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html
|
||||
tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or ''
|
||||
|
||||
title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
|
||||
date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
|
||||
html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
|
||||
# NB: HTMLParser may choke on this html; use {find_element} or try_call(lambda: get_element...)
|
||||
tracklist_table = traverse_obj(html, {find_element(cls='table table-small')}) or ''
|
||||
title = traverse_obj(html, ({find_element(tag='h1')}, {clean_html}))
|
||||
|
||||
album_meta = {
|
||||
'title': title,
|
||||
'album': title,
|
||||
'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
|
||||
'album_artist': try_call(
|
||||
lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)),
|
||||
'release_date': date,
|
||||
'album_artists': traverse_obj(html, (
|
||||
{find_element(cls='h-normal text-uppercase mb-desktop-medium mb-smallish')}, {clean_html}, all)),
|
||||
'release_date': traverse_obj(html, (
|
||||
{find_element(cls='font-italic mb-medium d-tablet-none d-phone-block')},
|
||||
{lambda x: x.partition('Released ')}, 2, {strip_or_none}, {unified_strdate})),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
|
||||
episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
|
||||
return {
|
||||
'id': episode['id'].partition(':')[2],
|
||||
**traverse_obj(episode, {
|
||||
|
||||
@@ -6,12 +6,10 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NekoHackerIE(InfoExtractor):
|
||||
@@ -35,7 +33,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Spaceship',
|
||||
'track_number': 1,
|
||||
'duration': 195.0,
|
||||
@@ -53,7 +51,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'City Runner',
|
||||
'track_number': 2,
|
||||
'duration': 148.0,
|
||||
@@ -71,7 +69,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Nature Talk',
|
||||
'track_number': 3,
|
||||
'duration': 174.0,
|
||||
@@ -89,7 +87,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20221101',
|
||||
'album': 'Nekoverse',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'Crystal World',
|
||||
'track_number': 4,
|
||||
'duration': 199.0,
|
||||
@@ -115,7 +113,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
|
||||
'track_number': 1,
|
||||
},
|
||||
@@ -132,7 +130,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
|
||||
'track_number': 2,
|
||||
},
|
||||
@@ -149,7 +147,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': '進め!むじなカンパニー (instrumental)',
|
||||
'track_number': 3,
|
||||
},
|
||||
@@ -166,7 +164,7 @@ class NekoHackerIE(InfoExtractor):
|
||||
'acodec': 'mp3',
|
||||
'release_date': '20210115',
|
||||
'album': '進め!むじなカンパニー',
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'track': 'むじな de なじむ (instrumental)',
|
||||
'track_number': 4,
|
||||
},
|
||||
@@ -181,14 +179,17 @@ class NekoHackerIE(InfoExtractor):
|
||||
playlist = get_element_by_class('playlist', webpage)
|
||||
|
||||
if not playlist:
|
||||
iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
|
||||
iframe_src = url_or_none(extract_attributes(iframe).get('src'))
|
||||
iframe_src = traverse_obj(webpage, (
|
||||
{find_element(tag='iframe', html=True)}, {extract_attributes}, 'src', {url_or_none}))
|
||||
if not iframe_src:
|
||||
raise ExtractorError('No playlist or embed found in webpage')
|
||||
elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
|
||||
raise ExtractorError('Spotify embeds are not supported', expected=True)
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
player_params = self._search_json(
|
||||
r'var srp_player_params_[\da-f]+\s*=', webpage, 'player params', playlist_id, default={})
|
||||
|
||||
entries = []
|
||||
for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
|
||||
entry = traverse_obj(extract_attributes(track), {
|
||||
@@ -200,12 +201,12 @@ class NekoHackerIE(InfoExtractor):
|
||||
'album': 'data-albumtitle',
|
||||
'duration': ('data-tracktime', {parse_duration}),
|
||||
'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
|
||||
'thumbnail': ('data-albumart', {url_or_none}),
|
||||
})
|
||||
entries.append({
|
||||
**entry,
|
||||
'thumbnail': url_or_none(player_params.get('artwork')),
|
||||
'track_number': track_number,
|
||||
'artist': 'Neko Hacker',
|
||||
'artists': ['Neko Hacker'],
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
|
||||
})
|
||||
|
||||
@@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def _kilo_or_none(value):
|
||||
return int_or_none(value, scale=1000)
|
||||
|
||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
||||
|
||||
@@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(song, {
|
||||
'ext': ('type', {str}),
|
||||
'abr': ('br', {self._kilo_or_none}),
|
||||
'abr': ('br', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
@@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
**lyric_data,
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||
'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'album': ('album', 'name', {str}),
|
||||
'average_rating': ('score', {int_or_none}),
|
||||
}),
|
||||
@@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
'tags': ('tags', ..., {str}),
|
||||
'uploader': ('creator', 'nickname', {str}),
|
||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||
'timestamp': ('updateTime', {int_or_none(scale=1000)}),
|
||||
}))
|
||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||
@@ -517,10 +513,10 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
|
||||
**traverse_obj(info, {
|
||||
'title': ('name', {str}),
|
||||
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
||||
'description': (('desc', 'briefDesc'), {str}, filter),
|
||||
'upload_date': ('publishTime', {unified_strdate}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'duration': ('duration', {self._kilo_or_none}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'like_count': ('likeCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
@@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
'description': ('description', {str}),
|
||||
'creator': ('dj', 'brand', {str}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||
'timestamp': ('createTime', {int_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
if not self._yes_playlist(
|
||||
@@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
return {
|
||||
'id': str(info['mainSong']['id']),
|
||||
'formats': formats,
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||
'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
|
||||
**metainfo,
|
||||
}
|
||||
|
||||
|
||||
@@ -11,9 +11,12 @@ from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
get_element_by_class,
|
||||
traverse_obj,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NFLBaseIE(InfoExtractor):
|
||||
@@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
|
||||
'osVersion': '10.0',
|
||||
}, separators=(',', ':')).encode()).decode(),
|
||||
'networkType': 'other',
|
||||
'nflClaimGroupsToAdd': [],
|
||||
'nflClaimGroupsToRemove': [],
|
||||
'peacockUUID': 'undefined',
|
||||
}
|
||||
_ACCOUNT_INFO = {}
|
||||
_API_KEY = None
|
||||
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||
|
||||
_TOKEN = None
|
||||
_TOKEN_EXPIRY = 0
|
||||
|
||||
def _get_account_info(self, url, slug):
|
||||
if not self._API_KEY:
|
||||
webpage = self._download_webpage(url, slug, fatal=False) or ''
|
||||
self._API_KEY = self._search_regex(
|
||||
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
|
||||
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
|
||||
|
||||
def _get_account_info(self):
|
||||
cookies = self._get_cookies('https://auth-id.nfl.com/')
|
||||
login_token = traverse_obj(cookies, (
|
||||
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
|
||||
@@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
|
||||
'or else try using --cookies-from-browser instead', expected=True)
|
||||
|
||||
account = self._download_json(
|
||||
'https://auth-id.nfl.com/accounts.getAccountInfo', slug,
|
||||
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
|
||||
note='Downloading account info', data=urlencode_postdata({
|
||||
'include': 'profile,data',
|
||||
'lang': 'en',
|
||||
@@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
|
||||
'sdk': 'js_latest',
|
||||
'login_token': login_token,
|
||||
'authMode': 'cookie',
|
||||
'pageURL': url,
|
||||
'pageURL': 'https://www.nfl.com/',
|
||||
'sdkBuild': traverse_obj(cookies, (
|
||||
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
|
||||
'format': 'json',
|
||||
@@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor):
|
||||
if len(self._ACCOUNT_INFO) != 3:
|
||||
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
|
||||
|
||||
def _get_auth_token(self, url, slug):
|
||||
def _get_auth_token(self):
|
||||
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
|
||||
return
|
||||
|
||||
if not self._ACCOUNT_INFO:
|
||||
self._get_account_info(url, slug)
|
||||
|
||||
token = self._download_json(
|
||||
'https://api.nfl.com/identity/v3/token%s' % (
|
||||
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
|
||||
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
|
||||
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
|
||||
|
||||
self._TOKEN = token['accessToken']
|
||||
self._TOKEN_EXPIRY = token['expiresIn']
|
||||
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
|
||||
|
||||
def _extract_video(self, mcp_id, is_live=False):
|
||||
self._get_auth_token()
|
||||
data = self._download_json(
|
||||
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
|
||||
'Authorization': f'Bearer {self._TOKEN}',
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': mcp_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
|
||||
**traverse_obj(data, ('metadata', {
|
||||
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
|
||||
'description': ('event', 'def_description', {str}),
|
||||
'duration': ('event', 'duration', {int_or_none}),
|
||||
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _parse_video_config(self, video_config, display_id):
|
||||
video_config = self._parse_json(video_config, display_id)
|
||||
is_live = traverse_obj(video_config, ('live', {bool})) or False
|
||||
item = video_config['playlist'][0]
|
||||
mcp_id = item.get('mcpID')
|
||||
if mcp_id:
|
||||
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
|
||||
if mcp_id := item.get('mcpID'):
|
||||
return self._extract_video(mcp_id, is_live=is_live)
|
||||
|
||||
info = {'id': item.get('id') or item['entityId']}
|
||||
|
||||
item_url = item['url']
|
||||
ext = determine_ext(item_url)
|
||||
if ext == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
|
||||
else:
|
||||
media_id = item.get('id') or item['entityId']
|
||||
title = item.get('title')
|
||||
item_url = item['url']
|
||||
info = {'id': media_id}
|
||||
ext = determine_ext(item_url)
|
||||
if ext == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
|
||||
else:
|
||||
info['url'] = item_url
|
||||
if item.get('audio') is True:
|
||||
info['vcodec'] = 'none'
|
||||
is_live = video_config.get('live') is True
|
||||
thumbnails = None
|
||||
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
|
||||
if image_url:
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'ext': determine_ext(image_url, 'jpg'),
|
||||
}]
|
||||
info.update({
|
||||
'title': title,
|
||||
'is_live': is_live,
|
||||
'description': clean_html(item.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
info['url'] = item_url
|
||||
if item.get('audio') is True:
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
thumbnails = None
|
||||
if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'ext': determine_ext(image_url, 'jpg'),
|
||||
}]
|
||||
|
||||
info.update({
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
}),
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'NFL',
|
||||
'tags': 'count:6',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 157,
|
||||
'categories': 'count:3',
|
||||
'_old_archive_ids': ['anvato 899441'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||
'md5': '6886b32c24b463038c760ceb55a34566',
|
||||
'md5': '92a517f05bd3eb50fe50244bc621aec8',
|
||||
'info_dict': {
|
||||
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
||||
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||
'only_matching': True,
|
||||
@@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entries = []
|
||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||
entries.append(self._parse_video_config(video_config, display_id))
|
||||
|
||||
def entries():
|
||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||
yield self._parse_video_config(video_config, display_id)
|
||||
|
||||
title = clean_html(get_element_by_class(
|
||||
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage)
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
return self.playlist_result(entries(), display_id, title)
|
||||
|
||||
|
||||
class NFLPlusReplayIE(NFLBaseIE):
|
||||
@@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
'all_22': 'All-22',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._get_account_info()
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
requested_types = self._configuration_arg('type', ['all'])
|
||||
@@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
|
||||
|
||||
if not video_id:
|
||||
self._get_auth_token(url, slug)
|
||||
self._get_auth_token()
|
||||
headers = {'Authorization': f'Bearer {self._TOKEN}'}
|
||||
game_id = self._download_json(
|
||||
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
|
||||
@@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
|
||||
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
|
||||
|
||||
if video_id:
|
||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
def entries():
|
||||
for replay in traverse_obj(
|
||||
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
|
||||
):
|
||||
video_id = replay['mcpPlaybackId']
|
||||
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
yield self._extract_video(replay['mcpPlaybackId'])
|
||||
|
||||
return self.playlist_result(entries(), slug)
|
||||
|
||||
@@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._get_account_info()
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = self._match_id(url)
|
||||
self._get_auth_token(url, slug)
|
||||
self._get_auth_token()
|
||||
video_id = self._download_json(
|
||||
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
|
||||
'Authorization': f'Bearer {self._TOKEN}',
|
||||
})['mcpPlaybackId']
|
||||
|
||||
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
@@ -371,11 +371,11 @@ class NiconicoIE(InfoExtractor):
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(audio_quality, ('metadata', {
|
||||
'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'abr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_quality, ('metadata', {
|
||||
'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'vbr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'height': ('resolution', 'height', {int_or_none}),
|
||||
'width': ('resolution', 'width', {int_or_none}),
|
||||
})),
|
||||
@@ -428,7 +428,7 @@ class NiconicoIE(InfoExtractor):
|
||||
**audio_fmt,
|
||||
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
|
||||
'format_id': ('id', {str}),
|
||||
'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}),
|
||||
'abr': ('bitRate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
}), get_all=False),
|
||||
'acodec': 'aac',
|
||||
@@ -869,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE):
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)(?:/video)?/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
@@ -877,7 +877,7 @@ class NiconicoUserIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
@@ -897,12 +897,13 @@ class NiconicoUserIE(InfoExtractor):
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed['data']['items']:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id']))
|
||||
yield self.url_result(
|
||||
f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE)
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
|
||||
@@ -10,10 +10,10 @@ from ..utils import (
|
||||
get_element_html_by_class,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import find_element, find_elements, traverse_obj
|
||||
|
||||
|
||||
class NubilesPornIE(InfoExtractor):
|
||||
@@ -70,9 +70,8 @@ class NubilesPornIE(InfoExtractor):
|
||||
url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0]
|
||||
|
||||
channel_id, channel_name = self._search_regex(
|
||||
r'/video/website/(?P<id>\d+).+>(?P<name>\w+).com', get_element_html_by_class('site-link', page),
|
||||
r'/video/website/(?P<id>\d+).+>(?P<name>\w+).com', get_element_html_by_class('site-link', page) or '',
|
||||
'channel', fatal=False, group=('id', 'name')) or (None, None)
|
||||
channel_name = re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -82,14 +81,14 @@ class NubilesPornIE(InfoExtractor):
|
||||
'thumbnail': media_entries.get('thumbnail'),
|
||||
'description': clean_html(get_element_html_by_class('content-pane-description', page)),
|
||||
'timestamp': unified_timestamp(get_element_by_class('date', page)),
|
||||
'channel': channel_name,
|
||||
'channel': re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name) if channel_name else None,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'),
|
||||
'like_count': int_or_none(get_element_by_id('likecount', page)),
|
||||
'average_rating': float_or_none(get_element_by_class('score', page)),
|
||||
'age_limit': 18,
|
||||
'categories': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_element_by_class('categories', page))))),
|
||||
'tags': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_elements_by_class('tags', page)[1])))),
|
||||
'categories': traverse_obj(page, ({find_element(cls='categories')}, {find_elements(cls='btn')}, ..., {clean_html})),
|
||||
'tags': traverse_obj(page, ({find_elements(cls='tags')}, 1, {find_elements(cls='btn')}, ..., {clean_html})),
|
||||
'cast': get_elements_by_class('content-pane-performer', page),
|
||||
'availability': 'needs_auth',
|
||||
'series': channel_name,
|
||||
|
||||
@@ -235,7 +235,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
details = traverse_obj(block, {
|
||||
'id': ('sourceId', {str}),
|
||||
'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
|
||||
'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
|
||||
'duration': (None, (('duration', {float_or_none(scale=1000)}), ('length', {int_or_none}))),
|
||||
'timestamp': ('firstPublished', {parse_iso8601}),
|
||||
'series': ('podcastSeries', {str}),
|
||||
}, get_all=False)
|
||||
|
||||
@@ -115,7 +115,7 @@ class OnDemandKoreaIE(InfoExtractor):
|
||||
**traverse_obj(data, {
|
||||
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
|
||||
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
|
||||
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
|
||||
'series': ('episode', {if_series(key='program')}, 'title'),
|
||||
'series_id': ('episode', {if_series(key='program')}, 'id', {str_or_none}),
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import base64
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -192,7 +191,7 @@ class ORFPodcastIE(InfoExtractor):
|
||||
'ext': ('enclosures', 0, 'type', {mimetype2ext}),
|
||||
'title': 'title',
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'series': ('podcast', 'title'),
|
||||
})),
|
||||
}
|
||||
@@ -494,7 +493,7 @@ class ORFONIE(InfoExtractor):
|
||||
return traverse_obj(api_json, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'age_limit': ('age_classification', {parse_age_limit}),
|
||||
'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('exact_duration', {float_or_none(scale=1000)}),
|
||||
'title': (('title', 'headline'), {str}),
|
||||
'description': (('description', 'teaser_text'), {str}),
|
||||
'media_type': ('video_type', {str}),
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
@@ -83,7 +81,7 @@ class ParlerIE(InfoExtractor):
|
||||
'timestamp': ('date_created', {unified_timestamp}),
|
||||
'uploader': ('user', 'name', {strip_or_none}),
|
||||
'uploader_id': ('user', 'username', {str}),
|
||||
'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
|
||||
'uploader_url': ('user', 'username', {urljoin('https://parler.com/')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'comment_count': ('total_comments', {int_or_none}),
|
||||
'repost_count': ('echos', {int_or_none}),
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -104,7 +104,7 @@ class PornboxIE(InfoExtractor):
|
||||
get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
|
||||
metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
|
||||
'url': 'src',
|
||||
'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
'vbr': ('bitrate', {int_or_none(scale=1000)}),
|
||||
'format_id': ('quality', {str_or_none}),
|
||||
'quality': ('quality', {get_quality}),
|
||||
'width': ('size', {lambda x: int(x[:-1])}),
|
||||
|
||||
@@ -198,6 +198,6 @@ class Pr0grammIE(InfoExtractor):
|
||||
'dislike_count': ('down', {int}),
|
||||
'timestamp': ('created', {int}),
|
||||
'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}),
|
||||
'thumbnail': ('thumb', {urljoin('https://thumb.pr0gramm.com')}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -140,7 +140,7 @@ class QDanceIE(InfoExtractor):
|
||||
'description': ('description', {str.strip}),
|
||||
'display_id': ('slug', {str}),
|
||||
'thumbnail': ('thumbnail', {url_or_none}),
|
||||
'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}),
|
||||
'duration': ('durationInSeconds', {int_or_none}, filter),
|
||||
'availability': ('subscription', 'level', {extract_availability}),
|
||||
'is_live': ('type', {lambda x: x.lower() == 'live'}),
|
||||
'artist': ('acts', ..., {str}),
|
||||
|
||||
@@ -211,10 +211,10 @@ class QQMusicIE(QQMusicBaseIE):
|
||||
'formats': formats,
|
||||
**traverse_obj(info_data, {
|
||||
'title': ('title', {str}),
|
||||
'album': ('album', 'title', {str}, {lambda x: x or None}),
|
||||
'album': ('album', 'title', {str}, filter),
|
||||
'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
|
||||
'creators': ('singer', ..., 'name', {str}),
|
||||
'alt_title': ('subtitle', {str}, {lambda x: x or None}),
|
||||
'alt_title': ('subtitle', {str}, filter),
|
||||
'duration': ('interval', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(init_data, ('detail', {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
@@ -118,7 +117,7 @@ class RedCDNLivxIE(InfoExtractor):
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
|
||||
ism_doc, ('@Duration', {float_or_none(scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
|
||||
@@ -187,4 +187,4 @@ class RTVSLOShowIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
||||
playlist_id, self._html_extract_title(webpage),
|
||||
getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
|
||||
|
||||
@@ -56,13 +56,13 @@ class SnapchatSpotlightIE(InfoExtractor):
|
||||
**traverse_obj(video_data, ('videoMetadata', {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'timestamp': ('uploadDateMs', {float_or_none(scale=1000)}),
|
||||
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
|
||||
'repost_count': ('shareCount', {int_or_none}),
|
||||
'url': ('contentUrl', {url_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
|
||||
'duration': ('durationMs', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'uploader': ('creator', 'personCreator', 'username', {str}),
|
||||
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
|
||||
|
||||
@@ -208,7 +208,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
|
||||
track_id = str(info['id'])
|
||||
title = info['title']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
@@ -367,7 +366,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
|
||||
'uploader_url': user.get('permalink_url'),
|
||||
'timestamp': unified_timestamp(info.get('created_at')),
|
||||
'title': title,
|
||||
'title': info.get('title'),
|
||||
'description': info.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
@@ -377,7 +376,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
||||
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
}
|
||||
|
||||
@@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'genres': [],
|
||||
},
|
||||
},
|
||||
# geo-restricted
|
||||
@@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
@@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'repost_count': int,
|
||||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
'artists': ['80M'],
|
||||
},
|
||||
},
|
||||
# private link, downloadable format
|
||||
@@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
'artists': ['Ori Uplift'],
|
||||
},
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
@@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'genres': [],
|
||||
'artists': ['MadReal'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
||||
@@ -3,14 +3,12 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TBSJPEpisodeIE(InfoExtractor):
|
||||
@@ -64,7 +62,7 @@ class TBSJPEpisodeIE(InfoExtractor):
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'title': try_call(lambda: clean_html(get_element_text_and_html_by_tag('h3', webpage)[0])),
|
||||
'title': traverse_obj(webpage, ({find_element(tag='h3')}, {clean_html})),
|
||||
'id': video_id,
|
||||
**traverse_obj(episode, {
|
||||
'categories': ('keywords', {list}),
|
||||
|
||||
@@ -136,7 +136,7 @@ class TeamcocoIE(TeamcocoBaseIE):
|
||||
'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
|
||||
|
||||
thumbnail = traverse_obj(
|
||||
info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
|
||||
info, (('image', 'poster'), {urljoin('https://teamcoco.com/')}), get_all=False)
|
||||
video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(info, video_id)
|
||||
|
||||
@@ -10,10 +10,11 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def _fmt_url(url):
|
||||
return functools.partial(format_field, template=url, default=None)
|
||||
return format_field(template=url, default=None)
|
||||
|
||||
|
||||
class TelewebionIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telewebion.com/episode/0x1b3139c/',
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
@@ -278,7 +277,7 @@ class VQQSeriesIE(VQQBaseIE):
|
||||
webpage)]
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
|
||||
episode_paths, series_id, ie=VQQVideoIE, getter=urljoin(url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
@@ -328,7 +327,7 @@ class WeTvBaseIE(TencentBaseIE):
|
||||
or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=ie, getter=functools.partial(urljoin, url),
|
||||
episode_paths, series_id, ie=ie, getter=urljoin(url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -161,4 +160,4 @@ class TenPlaySeasonIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
|
||||
playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
|
||||
getter=functools.partial(urljoin, url))
|
||||
getter=urljoin(url))
|
||||
|
||||
@@ -131,4 +131,4 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor):
|
||||
|
||||
return self.playlist_from_matches(
|
||||
self._entries(url, podcast_id), podcast_id, title, description=description,
|
||||
ie=TheGuardianPodcastIE, getter=lambda x: urljoin('https://www.theguardian.com', x))
|
||||
ie=TheGuardianPodcastIE, getter=urljoin('https://www.theguardian.com'))
|
||||
|
||||
@@ -469,7 +469,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': (traverse_obj(video_info, (
|
||||
(None, 'download_addr'), 'duration', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
(None, 'download_addr'), 'duration', {int_or_none(scale=1000)}, any))
|
||||
or traverse_obj(music_info, ('duration', {int_or_none}))),
|
||||
'availability': self._availability(
|
||||
is_private='Private' in labels,
|
||||
@@ -583,7 +583,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None),
|
||||
**traverse_obj(aweme_detail, ('music', {
|
||||
'track': ('title', {str}),
|
||||
'album': ('album', {str}, {lambda x: x or None}),
|
||||
'album': ('album', {str}, filter),
|
||||
'artists': ('authorName', {str}, {lambda x: re.split(r'(?:, | & )', x) if x else None}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
@@ -591,7 +591,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||
'title': ('desc', {str}),
|
||||
'description': ('desc', {str}),
|
||||
# audio-only slideshows have a video duration of 0 and an actual audio duration
|
||||
'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}),
|
||||
'duration': ('video', 'duration', {int_or_none}, filter),
|
||||
'timestamp': ('createTime', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(aweme_detail, ('stats', {
|
||||
@@ -1493,7 +1493,7 @@ class TikTokLiveIE(TikTokBaseIE):
|
||||
|
||||
sdk_params = traverse_obj(stream, ('main', 'sdk_params', {parse_inner}, {
|
||||
'vcodec': ('VCodec', {str}),
|
||||
'tbr': ('vbitrate', {lambda x: int_or_none(x, 1000)}),
|
||||
'tbr': ('vbitrate', {int_or_none(scale=1000)}),
|
||||
'resolution': ('resolution', {lambda x: re.match(r'(?i)\d+x\d+|\d+p', x).group().lower()}),
|
||||
}))
|
||||
|
||||
|
||||
@@ -3,12 +3,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
|
||||
_VALID_URL = r'https?://(?P<blog_name_1>[^/?#&]+)\.tumblr\.com/(?:post|video|(?P<blog_name_2>[a-zA-Z\d-]+))/(?P<id>[0-9]+)(?:$|[/?#])'
|
||||
_NETRC_MACHINE = 'tumblr'
|
||||
_LOGIN_URL = 'https://www.tumblr.com/login'
|
||||
_OAUTH_URL = 'https://www.tumblr.com/api/v2/oauth2/token'
|
||||
@@ -66,6 +67,7 @@ class TumblrIE(InfoExtractor):
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'note': 'dashboard only (original post)',
|
||||
'url': 'https://jujanon.tumblr.com/post/159704441298/my-baby-eating',
|
||||
@@ -98,7 +100,6 @@ class TumblrIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'age_limit': 0,
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'note': 'dashboard only (external)',
|
||||
@@ -109,14 +110,13 @@ class TumblrIE(InfoExtractor):
|
||||
'title': 'The Blues Remembers Everything the Country Forgot',
|
||||
'alt_title': 'The Blues Remembers Everything the Country Forgot',
|
||||
'description': 'md5:1a6b4097e451216835a24c1023707c79',
|
||||
'release_date': '20201224',
|
||||
'creator': 'md5:c2239ba15430e87c3b971ba450773272',
|
||||
'uploader': 'Moor Mother - Topic',
|
||||
'upload_date': '20201223',
|
||||
'uploader_id': 'UCxrMtFBRkFvQJ_vVM4il08w',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w',
|
||||
'thumbnail': r're:^https?://i.ytimg.com/.*',
|
||||
'channel': 'Moor Mother - Topic',
|
||||
'channel': 'Moor Mother',
|
||||
'channel_id': 'UCxrMtFBRkFvQJ_vVM4il08w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w',
|
||||
'channel_follower_count': int,
|
||||
@@ -135,24 +135,10 @@ class TumblrIE(InfoExtractor):
|
||||
'release_year': 2020,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
|
||||
'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
|
||||
'info_dict': {
|
||||
'id': 'Wmur',
|
||||
'ext': 'mp4',
|
||||
'title': 'naked smoking & stretching',
|
||||
'upload_date': '20150506',
|
||||
'timestamp': 1430931613,
|
||||
'age_limit': 18,
|
||||
'uploader_id': '1638622',
|
||||
'uploader': 'naked-yogi',
|
||||
},
|
||||
# 'add_ie': ['Vidme'],
|
||||
'skip': 'dead embedded video host',
|
||||
'skip': 'Video Unavailable',
|
||||
}, {
|
||||
'url': 'https://prozdvoices.tumblr.com/post/673201091169681408/what-recording-voice-acting-sounds-like',
|
||||
'md5': 'a0063fc8110e6c9afe44065b4ea68177',
|
||||
'md5': 'cb8328a6723c30556cef59e370202918',
|
||||
'info_dict': {
|
||||
'id': 'eomhW5MLGWA',
|
||||
'ext': 'mp4',
|
||||
@@ -160,8 +146,8 @@ class TumblrIE(InfoExtractor):
|
||||
'description': 'md5:1da3faa22d0e0b1d8b50216c284ee798',
|
||||
'uploader': 'ProZD',
|
||||
'upload_date': '20220112',
|
||||
'uploader_id': 'ProZD',
|
||||
'uploader_url': 'http://www.youtube.com/user/ProZD',
|
||||
'uploader_id': '@ProZD',
|
||||
'uploader_url': 'https://www.youtube.com/@ProZD',
|
||||
'thumbnail': r're:^https?://i.ytimg.com/.*',
|
||||
'channel': 'ProZD',
|
||||
'channel_id': 'UC6MFZAOHXlKK1FI7V0XQVeA',
|
||||
@@ -176,6 +162,10 @@ class TumblrIE(InfoExtractor):
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'heatmap': 'count:100',
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1642014562,
|
||||
'comment_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
@@ -183,16 +173,20 @@ class TumblrIE(InfoExtractor):
|
||||
'md5': '203e9eb8077e3f45bfaeb4c86c1467b8',
|
||||
'info_dict': {
|
||||
'id': '87816359',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Harold Ramis',
|
||||
'description': 'md5:be8e68cbf56ce0785c77f0c6c6dfaf2c',
|
||||
'description': 'md5:c99882405fcca0b1d348ad093f8f1672',
|
||||
'uploader': 'Resolution Productions Group',
|
||||
'uploader_id': 'resolutionproductions',
|
||||
'uploader_url': 'https://vimeo.com/resolutionproductions',
|
||||
'upload_date': '20140227',
|
||||
'thumbnail': r're:^https?://i.vimeocdn.com/video/.*',
|
||||
'timestamp': 1393523719,
|
||||
'timestamp': 1393541719,
|
||||
'duration': 291,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1393541719,
|
||||
'release_date': '20140227',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
@@ -214,6 +208,7 @@ class TumblrIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'skip': 'Vine is unavailable',
|
||||
}, {
|
||||
'url': 'https://silami.tumblr.com/post/84250043974/my-bad-river-flows-in-you-impression-on-maschine',
|
||||
'md5': '3c92d7c3d867f14ccbeefa2119022277',
|
||||
@@ -232,6 +227,140 @@ class TumblrIE(InfoExtractor):
|
||||
'upload_date': '20140429',
|
||||
},
|
||||
'add_ie': ['Instagram'],
|
||||
}, {
|
||||
'note': 'new url scheme',
|
||||
'url': 'https://www.tumblr.com/autumnsister/765162750456578048?source=share',
|
||||
'info_dict': {
|
||||
'id': '765162750456578048',
|
||||
'ext': 'mp4',
|
||||
'uploader_url': 'https://autumnsister.tumblr.com/',
|
||||
'tags': ['autumn', 'food', 'curators on tumblr'],
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://64.media.tumblr.com/tumblr_sklad89N3x1ygquow_frame1.jpg',
|
||||
'title': '🪹',
|
||||
'uploader_id': 'autumnsister',
|
||||
'repost_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'note': 'bandcamp album embed',
|
||||
'url': 'https://patricia-taxxon.tumblr.com/post/704473755725004800/patricia-taxxon-agnes-hilda-patricia-taxxon',
|
||||
'info_dict': {
|
||||
'id': 'agnes-hilda',
|
||||
'title': 'Agnes & Hilda',
|
||||
'description': 'The inexplicable joy of an artist. Wash paws after listening.',
|
||||
'uploader_id': 'patriciataxxon',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
'note': 'bandcamp track embeds (many)',
|
||||
'url': 'https://www.tumblr.com/felixcosm/730460905855467520/if-youre-looking-for-new-music-to-write-or',
|
||||
'info_dict': {
|
||||
'id': '730460905855467520',
|
||||
'uploader_id': 'felixcosm',
|
||||
'repost_count': int,
|
||||
'tags': 'count:15',
|
||||
'description': 'md5:2eb3482a3c6987280cbefb6839068f32',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'title': 'If you\'re looking for new music to write or imagine scenerios to: STOP. This is for you.',
|
||||
'uploader_url': 'https://felixcosm.tumblr.com/',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'note': 'soundcloud track embed',
|
||||
'url': 'https://silverfoxstole.tumblr.com/post/765305403763556352/jamie-robertson-doctor-who-8th-doctor',
|
||||
'info_dict': {
|
||||
'id': '1218136399',
|
||||
'ext': 'opus',
|
||||
'comment_count': int,
|
||||
'genres': [],
|
||||
'repost_count': int,
|
||||
'uploader': 'Jamie Robertson',
|
||||
'title': 'Doctor Who - 8th doctor - Stranded Theme never released and used.',
|
||||
'duration': 46.106,
|
||||
'uploader_id': '2731064',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-MVgcPm5jN42isC5M-6Dz22w-original.jpg',
|
||||
'timestamp': 1645181261,
|
||||
'uploader_url': 'https://soundcloud.com/jamierobertson',
|
||||
'view_count': int,
|
||||
'upload_date': '20220218',
|
||||
'description': 'md5:ab924dd9994d0a7d64d6d31bf2af4625',
|
||||
'license': 'all-rights-reserved',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'note': 'soundcloud set embed',
|
||||
'url': 'https://www.tumblr.com/beyourselfchulanmaria/703505323122638848/chu-lan-maria-the-playlist-%E5%BF%83%E7%9A%84%E5%91%BC%E5%96%9A-call-of-the',
|
||||
'info_dict': {
|
||||
'id': '691222680',
|
||||
'title': '心的呼喚 Call of the heart I',
|
||||
'description': 'md5:25952a8d178a3aa55e40fcbb646a38c3',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'note': 'dailymotion video embed',
|
||||
'url': 'https://www.tumblr.com/funvibecentral/759390024460632064',
|
||||
'info_dict': {
|
||||
'id': 'x94cnnk',
|
||||
'ext': 'mp4',
|
||||
'description': 'Funny dailymotion shorts.\n#funny #fun#comedy #romantic #exciting',
|
||||
'uploader': 'FunVibe Central',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'timestamp': 1724210553,
|
||||
'title': 'Woman watching other Woman',
|
||||
'tags': [],
|
||||
'upload_date': '20240821',
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x32m6ye',
|
||||
'duration': 20,
|
||||
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Wtqh01cnxKNXLG1N8/x1080',
|
||||
},
|
||||
}, {
|
||||
'note': 'tiktok video embed',
|
||||
'url': 'https://fansofcolor.tumblr.com/post/660637918605475840/blockquote-class-tiktok-embed',
|
||||
'info_dict': {
|
||||
'id': '7000937272010935558',
|
||||
'ext': 'mp4',
|
||||
'artists': ['Alicia Dreaming'],
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
|
||||
'channel_id': 'MS4wLjABAAAAsJohwz_dU4KfAOc61cbGDAZ46-5hg2ANTXVQlRe1ipDhpX08PywR3PPiple1NTAo',
|
||||
'uploader': 'aliciadreaming',
|
||||
'description': 'huge casting news Greyworm will be #louisdulac #racebending #interviewwiththevampire',
|
||||
'title': 'huge casting news Greyworm will be #louisdulac #racebending #interviewwiththevampire',
|
||||
'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAsJohwz_dU4KfAOc61cbGDAZ46-5hg2ANTXVQlRe1ipDhpX08PywR3PPiple1NTAo',
|
||||
'uploader_id': '7000478462196990982',
|
||||
'uploader_url': 'https://www.tiktok.com/@aliciadreaming',
|
||||
'timestamp': 1630032733,
|
||||
'channel': 'Alicia Dreaming',
|
||||
'track': 'original sound',
|
||||
'upload_date': '20210827',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 59,
|
||||
},
|
||||
}, {
|
||||
'note': 'tumblr video AND youtube embed',
|
||||
'url': 'https://www.tumblr.com/anyaboz/765332564457209856/my-music-video-for-selkie-by-nobodys-wolf-child',
|
||||
'info_dict': {
|
||||
'id': '765332564457209856',
|
||||
'uploader_id': 'anyaboz',
|
||||
'repost_count': int,
|
||||
'age_limit': 0,
|
||||
'uploader_url': 'https://anyaboz.tumblr.com/',
|
||||
'description': 'md5:9a129cf6ce9d87a80ffd3c6dedd4d1e6',
|
||||
'like_count': int,
|
||||
'title': 'md5:b18a2ac9387681d20303e485db85c1b5',
|
||||
'tags': ['music video', 'nobodys wolf child', 'selkie', 'Stop Motion Animation', 'stop Motion', 'room guardians', 'Youtube'],
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# twitch_live provider - error when linked account is not live
|
||||
'url': 'https://www.tumblr.com/anarcho-skamunist/722224493650722816/hollow-knight-stream-right-now-going-to-fight',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_providers = {
|
||||
@@ -239,6 +368,16 @@ class TumblrIE(InfoExtractor):
|
||||
'vimeo': 'Vimeo',
|
||||
'vine': 'Vine',
|
||||
'youtube': 'Youtube',
|
||||
'dailymotion': 'Dailymotion',
|
||||
'tiktok': 'TikTok',
|
||||
'twitch_live': 'TwitchStream',
|
||||
'bandcamp': None,
|
||||
'soundcloud': None,
|
||||
}
|
||||
# known not to be supported
|
||||
_unsupported_providers = {
|
||||
# seems like podcasts can't be embedded
|
||||
'spotify',
|
||||
}
|
||||
|
||||
_ACCESS_TOKEN = None
|
||||
@@ -256,23 +395,40 @@ class TumblrIE(InfoExtractor):
|
||||
if not self._ACCESS_TOKEN:
|
||||
return
|
||||
|
||||
self._download_json(
|
||||
self._OAUTH_URL, None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
},
|
||||
errnote='Login failed', fatal=False)
|
||||
data = {
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
}
|
||||
if self.get_param('twofactor'):
|
||||
data['tfa_token'] = self.get_param('twofactor')
|
||||
|
||||
def _call_login():
|
||||
return self._download_json(
|
||||
self._OAUTH_URL, None, 'Logging in',
|
||||
data=urlencode_postdata(data),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
},
|
||||
errnote='Login failed', fatal=False,
|
||||
expected_status=lambda s: 400 <= s < 500)
|
||||
|
||||
response = _call_login()
|
||||
if traverse_obj(response, 'error') == 'tfa_required':
|
||||
data['tfa_token'] = self._get_tfa_info()
|
||||
response = _call_login()
|
||||
if traverse_obj(response, 'error'):
|
||||
raise ExtractorError(
|
||||
f'API returned error {": ".join(traverse_obj(response, (("error", "error_description"), {str})))}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
blog, video_id = self._match_valid_url(url).groups()
|
||||
blog_1, blog_2, video_id = self._match_valid_url(url).groups()
|
||||
blog = blog_2 or blog_1
|
||||
|
||||
url = f'http://{blog}.tumblr.com/post/{video_id}/'
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
url = f'http://{blog}.tumblr.com/post/{video_id}'
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
url, video_id, headers={'User-Agent': 'WhatsApp/2.0'}) # whatsapp ua bypasses problems
|
||||
|
||||
redirect_url = urlh.url
|
||||
|
||||
@@ -289,23 +445,69 @@ class TumblrIE(InfoExtractor):
|
||||
self._download_json(
|
||||
f'https://www.tumblr.com/api/v2/blog/{blog}/posts/{video_id}/permalink',
|
||||
video_id, headers={'Authorization': f'Bearer {self._ACCESS_TOKEN}'}, fatal=False),
|
||||
('response', 'timeline', 'elements', 0)) or {}
|
||||
content_json = traverse_obj(post_json, ('trail', 0, 'content'), ('content')) or []
|
||||
video_json = next(
|
||||
(item for item in content_json if item.get('type') == 'video'), {})
|
||||
media_json = video_json.get('media') or {}
|
||||
if api_only and not media_json.get('url') and not video_json.get('url'):
|
||||
raise ExtractorError('Failed to find video data for dashboard-only post')
|
||||
('response', 'timeline', 'elements', 0, {dict})) or {}
|
||||
content_json = traverse_obj(post_json, ((('trail', 0), None), 'content', ..., {dict}))
|
||||
|
||||
if not media_json.get('url') and video_json.get('url'):
|
||||
# external video host
|
||||
return self.url_result(
|
||||
video_json['url'],
|
||||
self._providers.get(video_json.get('provider'), 'Generic'))
|
||||
# the url we're extracting from might be an original post or it might be a reblog.
|
||||
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
|
||||
# content_json is always the op, so if it exists but has no text, there's no description
|
||||
if content_json:
|
||||
description = '\n\n'.join(
|
||||
item.get('text') for item in content_json if item.get('type') == 'text') or None
|
||||
else:
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
uploader_id = traverse_obj(post_json, 'reblogged_root_name', 'blog_name')
|
||||
|
||||
video_url = self._og_search_video_url(webpage, default=None)
|
||||
duration = None
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': post_json.get('summary') or (blog if api_only else self._html_search_regex(
|
||||
r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>', webpage, 'title', default=blog)),
|
||||
'description': description,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
|
||||
**traverse_obj(post_json, {
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'repost_count': ('reblog_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
}),
|
||||
'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')),
|
||||
}
|
||||
|
||||
# for tumblr's own video hosting
|
||||
fallback_format = None
|
||||
formats = []
|
||||
video_url = self._og_search_video_url(webpage, default=None)
|
||||
# for external video hosts
|
||||
entries = []
|
||||
ignored_providers = set()
|
||||
unknown_providers = set()
|
||||
|
||||
for video_json in traverse_obj(content_json, lambda _, v: v['type'] in ('video', 'audio')):
|
||||
media_json = video_json.get('media') or {}
|
||||
if api_only and not media_json.get('url') and not video_json.get('url'):
|
||||
raise ExtractorError('Failed to find video data for dashboard-only post')
|
||||
provider = video_json.get('provider')
|
||||
|
||||
if provider in ('tumblr', None):
|
||||
fallback_format = {
|
||||
'url': media_json.get('url') or video_url,
|
||||
'width': int_or_none(
|
||||
media_json.get('width') or self._og_search_property('video:width', webpage, default=None)),
|
||||
'height': int_or_none(
|
||||
media_json.get('height') or self._og_search_property('video:height', webpage, default=None)),
|
||||
}
|
||||
continue
|
||||
elif provider in self._unsupported_providers:
|
||||
ignored_providers.add(provider)
|
||||
continue
|
||||
elif provider and provider not in self._providers:
|
||||
unknown_providers.add(provider)
|
||||
if video_json.get('url'):
|
||||
# external video host
|
||||
entries.append(self.url_result(
|
||||
video_json['url'], self._providers.get(provider)))
|
||||
|
||||
duration = None
|
||||
|
||||
# iframes can supply duration and sometimes additional formats, so check for one
|
||||
iframe_url = self._search_regex(
|
||||
@@ -344,44 +546,36 @@ class TumblrIE(InfoExtractor):
|
||||
'quality': quality,
|
||||
} for quality, (video_url, format_id) in enumerate(sources)]
|
||||
|
||||
if not media_json.get('url') and not video_url and not iframe_url:
|
||||
# external video host (but we weren't able to figure it out from the api)
|
||||
iframe_url = self._search_regex(
|
||||
r'src=["\'](https?://safe\.txmblr\.com/svc/embed/inline/[^"\']+)["\']',
|
||||
webpage, 'embed iframe url', default=None)
|
||||
return self.url_result(iframe_url or redirect_url, 'Generic')
|
||||
if not formats and fallback_format:
|
||||
formats.append(fallback_format)
|
||||
|
||||
formats = formats or [{
|
||||
'url': media_json.get('url') or video_url,
|
||||
'width': int_or_none(
|
||||
media_json.get('width') or self._og_search_property('video:width', webpage, default=None)),
|
||||
'height': int_or_none(
|
||||
media_json.get('height') or self._og_search_property('video:height', webpage, default=None)),
|
||||
}]
|
||||
if formats:
|
||||
# tumblr's own video is always above embeds
|
||||
entries.insert(0, {
|
||||
**info_dict,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'thumbnail': (traverse_obj(video_json, ('poster', 0, 'url', {url_or_none}))
|
||||
or self._og_search_thumbnail(webpage, default=None)),
|
||||
})
|
||||
|
||||
# the url we're extracting from might be an original post or it might be a reblog.
|
||||
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
|
||||
# content_json is always the op, so if it exists but has no text, there's no description
|
||||
if content_json:
|
||||
description = '\n\n'.join(
|
||||
item.get('text') for item in content_json if item.get('type') == 'text') or None
|
||||
else:
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
uploader_id = traverse_obj(post_json, 'reblogged_root_name', 'blog_name')
|
||||
if ignored_providers:
|
||||
if not entries:
|
||||
raise ExtractorError(f'None of embed providers are supported: {", ".join(ignored_providers)!s}', video_id=video_id, expected=True)
|
||||
else:
|
||||
self.report_warning(f'Skipped embeds from unsupported providers: {", ".join(ignored_providers)!s}', video_id)
|
||||
if unknown_providers:
|
||||
self.report_warning(f'Unrecognized providers, please report: {", ".join(unknown_providers)!s}', video_id)
|
||||
|
||||
if not entries:
|
||||
self.raise_no_formats('No video could be found in this post', expected=True, video_id=video_id)
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**info_dict,
|
||||
**entries[0],
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': post_json.get('summary') or (blog if api_only else self._html_search_regex(
|
||||
r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>', webpage, 'title')),
|
||||
'description': description,
|
||||
'thumbnail': (traverse_obj(video_json, ('poster', 0, 'url'))
|
||||
or self._og_search_thumbnail(webpage, default=None)),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None,
|
||||
'duration': duration,
|
||||
'like_count': post_json.get('like_count'),
|
||||
'repost_count': post_json.get('reblog_count'),
|
||||
'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')),
|
||||
'tags': post_json.get('tags'),
|
||||
'formats': formats,
|
||||
**info_dict,
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
@@ -68,7 +67,7 @@ class TVAIE(InfoExtractor):
|
||||
'episode': episode,
|
||||
**traverse_obj(entity, {
|
||||
'description': ('longDescription', {str}),
|
||||
'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('durationMillis', {float_or_none(scale=1000)}),
|
||||
'channel': ('knownEntities', 'channel', 'name', {str}),
|
||||
'series': ('knownEntities', 'videoShow', 'name', {str}),
|
||||
'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
|
||||
|
||||
@@ -150,14 +150,6 @@ class TwitterBaseIE(InfoExtractor):
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||
|
||||
# XXX: Temporary workaround until twitter.com => x.com migration is completed
|
||||
def _real_initialize(self):
|
||||
if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
|
||||
return
|
||||
# User has not yet been migrated to x.com and has passed twitter.com cookies
|
||||
TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
|
||||
TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
|
||||
@functools.cached_property
|
||||
def _selected_api(self):
|
||||
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -72,9 +71,9 @@ class VidyardBaseIE(InfoExtractor):
|
||||
'id': ('facadeUuid', {str}),
|
||||
'display_id': ('videoId', {int}, {str_or_none}),
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
|
||||
'description': ('description', {str}, {unescapeHTML}, filter),
|
||||
'duration': ((
|
||||
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
('milliseconds', {float_or_none(scale=1000)}),
|
||||
('seconds', {int_or_none})), any),
|
||||
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
|
||||
@@ -869,11 +869,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
and 'password' in traverse_obj(
|
||||
e.cause.response.read(),
|
||||
({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
import json
|
||||
import time
|
||||
import urllib.parse
|
||||
@@ -171,7 +170,7 @@ class VRTIE(VRTBaseIE):
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('shortDescription', {str}),
|
||||
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'thumbnail': ('posterImageUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ class WeiboBaseIE(InfoExtractor):
|
||||
'format': ('quality_desc', {str}),
|
||||
'format_id': ('label', {str}),
|
||||
'ext': ('mime', {mimetype2ext}),
|
||||
'tbr': ('bitrate', {int_or_none}, {lambda x: x or None}),
|
||||
'tbr': ('bitrate', {int_or_none}, filter),
|
||||
'vcodec': ('video_codecs', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
@@ -107,14 +107,14 @@ class WeiboBaseIE(InfoExtractor):
|
||||
**traverse_obj(video_info, {
|
||||
'id': (('id', 'id_str', 'mid'), {str_or_none}),
|
||||
'display_id': ('mblogid', {str_or_none}),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, {lambda x: x or None}),
|
||||
'title': ('page_info', 'media_info', ('video_title', 'kol_title', 'name'), {str}, filter),
|
||||
'description': ('text_raw', {str}),
|
||||
'duration': ('page_info', 'media_info', 'duration', {int_or_none}),
|
||||
'timestamp': ('page_info', 'media_info', 'video_publish_time', {int_or_none}),
|
||||
'thumbnail': ('page_info', 'page_pic', {url_or_none}),
|
||||
'uploader': ('user', 'screen_name', {str}),
|
||||
'uploader_id': ('user', ('id', 'id_str'), {str_or_none}),
|
||||
'uploader_url': ('user', 'profile_url', {lambda x: urljoin('https://weibo.com/', x)}),
|
||||
'uploader_url': ('user', 'profile_url', {urljoin('https://weibo.com/')}),
|
||||
'view_count': ('page_info', 'media_info', 'online_users_number', {int_or_none}),
|
||||
'like_count': ('attitudes_count', {int_or_none}),
|
||||
'repost_count': ('reposts_count', {int_or_none}),
|
||||
|
||||
@@ -159,8 +159,8 @@ class WeverseBaseIE(InfoExtractor):
|
||||
'creators': ('community', 'communityName', {str}, all),
|
||||
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
||||
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||
'release_timestamp': ('extension', 'video', 'onAirStartAt', {lambda x: int_or_none(x, 1000)}),
|
||||
'timestamp': ('publishedAt', {int_or_none(scale=1000)}),
|
||||
'release_timestamp': ('extension', 'video', 'onAirStartAt', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('extension', (('mediaInfo', 'thumbnail', 'url'), ('video', 'thumb')), {url_or_none}),
|
||||
'view_count': ('extension', 'video', 'playCount', {int_or_none}),
|
||||
'like_count': ('extension', 'video', 'likeCount', {int_or_none}),
|
||||
@@ -469,7 +469,7 @@ class WeverseMomentIE(WeverseBaseIE):
|
||||
'creator': (('community', 'author'), 'communityName', {str}),
|
||||
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||
'duration': ('extension', 'moment', 'video', 'uploadInfo', 'playTime', {float_or_none}),
|
||||
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||
'timestamp': ('publishedAt', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('extension', 'moment', 'video', 'uploadInfo', 'imageUrl', {url_or_none}),
|
||||
'like_count': ('emotionCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
|
||||
@@ -78,7 +78,7 @@ class WeVidiIE(InfoExtractor):
|
||||
}
|
||||
|
||||
src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
|
||||
for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
|
||||
for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, filter)):
|
||||
format_id = str(-(res // -2) - 1)
|
||||
yield {
|
||||
'acodec': 'mp4a.40.2',
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -51,7 +50,7 @@ class XiaoHongShuIE(InfoExtractor):
|
||||
'tbr': ('avgBitrate', {int_or_none}),
|
||||
'format': ('qualityType', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
})
|
||||
|
||||
formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), {
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class YleAreenaIE(InfoExtractor):
|
||||
@@ -15,9 +16,9 @@ class YleAreenaIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-4371942',
|
||||
'md5': '932edda0ecf5dfd6423804182d32f8ac',
|
||||
'md5': 'd87e9a1e74e67e009990ddd413e426b4',
|
||||
'info_dict': {
|
||||
'id': '0_a3tjk92c',
|
||||
'id': '1-4371942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pouchit',
|
||||
'description': 'md5:01071d7056ceec375f63960f90c35366',
|
||||
@@ -26,37 +27,27 @@ class YleAreenaIE(InfoExtractor):
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 1435,
|
||||
'view_count': int,
|
||||
'upload_date': '20181204',
|
||||
'release_date': '20190106',
|
||||
'timestamp': 1543916210,
|
||||
'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 7,
|
||||
'webpage_url': 'https://areena.yle.fi/1-4371942',
|
||||
'release_date': '20190105',
|
||||
'release_timestamp': 1546725660,
|
||||
'duration': 1435,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-2158940',
|
||||
'md5': 'cecb603661004e36af8c5188b5212b12',
|
||||
'md5': '6369ddc5e07b5fdaeda27a495184143c',
|
||||
'info_dict': {
|
||||
'id': '1_l38iz9ur',
|
||||
'id': '1-2158940',
|
||||
'ext': 'mp4',
|
||||
'title': 'Albi haluaa vessan',
|
||||
'description': 'md5:15236d810c837bed861fae0e88663c33',
|
||||
'description': 'Albi haluaa vessan.',
|
||||
'series': 'Albi Lumiukko',
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021',
|
||||
'uploader_id': 'ovp@yle.fi',
|
||||
'duration': 319,
|
||||
'view_count': int,
|
||||
'upload_date': '20211202',
|
||||
'release_date': '20211215',
|
||||
'timestamp': 1638448202,
|
||||
'subtitles': {},
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'age_limit': 0,
|
||||
'webpage_url': 'https://areena.yle.fi/1-2158940',
|
||||
'release_date': '20211215',
|
||||
'release_timestamp': 1639555200,
|
||||
'duration': 319,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -67,72 +58,125 @@ class YleAreenaIE(InfoExtractor):
|
||||
'title': 'HKO & Mälkki & Tanner',
|
||||
'description': 'md5:b4f1b1af2c6569b33f75179a86eea156',
|
||||
'series': 'Helsingin kaupunginorkesterin konsertteja',
|
||||
'thumbnail': r're:^https?://.+\.jpg$',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'release_date': '20230120',
|
||||
'release_timestamp': 1674242079,
|
||||
'duration': 8004,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/1-72251830',
|
||||
'info_dict': {
|
||||
'id': '1-72251830',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Pentulive 2024 | Pentulive \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'md5:1f118707d9093bf894a34fbbc865397b',
|
||||
'series': 'Pentulive',
|
||||
'thumbnail': r're:https://images\.cdn\.yle\.fi/image/upload/.+\.jpg',
|
||||
'live_status': 'is_live',
|
||||
'release_date': '20241025',
|
||||
'release_timestamp': 1729875600,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'livestream',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://areena.yle.fi/podcastit/1-71022852',
|
||||
'info_dict': {
|
||||
'id': '1-71022852',
|
||||
'ext': 'mp3',
|
||||
'title': 'Värityspäivä',
|
||||
'description': 'md5:c3a02b0455ec71d32cbe09d32ec161e2',
|
||||
'series': 'Murun ja Paukun ikioma kaupunki',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'release_date': '20240607',
|
||||
'release_timestamp': 1717736400,
|
||||
'duration': 442,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, is_podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
json_ld = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={})
|
||||
video_data = self._download_json(
|
||||
f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b',
|
||||
video_id, headers={
|
||||
'origin': 'https://areena.yle.fi',
|
||||
'referer': 'https://areena.yle.fi/',
|
||||
'content-type': 'application/json',
|
||||
})
|
||||
})['data']
|
||||
|
||||
# Example title: 'K1, J2: Pouchit | Modernit miehet'
|
||||
season_number, episode_number, episode, series = self._search_regex(
|
||||
r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)',
|
||||
info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
json_ld.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'),
|
||||
default=(None, None, None, None))
|
||||
description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str)
|
||||
description = traverse_obj(video_data, ('ongoing_ondemand', 'description', 'fin', {str}))
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)):
|
||||
if url_or_none(sub.get('uri')):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['uri'],
|
||||
'ext': 'srt',
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
for sub in traverse_obj(video_data, ('ongoing_ondemand', 'subtitles', lambda _, v: url_or_none(v['uri']))):
|
||||
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['uri'],
|
||||
'ext': 'srt',
|
||||
'name': sub.get('kind'),
|
||||
})
|
||||
|
||||
if is_podcast:
|
||||
info_dict = {
|
||||
'url': video_data['data']['ongoing_ondemand']['media_url'],
|
||||
}
|
||||
elif kaltura_id := traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
info_dict = {
|
||||
info_dict, metadata = {}, {}
|
||||
if is_podcast and traverse_obj(video_data, ('ongoing_ondemand', 'media_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict['url'] = metadata['media_url']
|
||||
elif traverse_obj(video_data, ('ongoing_event', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_event']
|
||||
metadata.pop('duration', None) # Duration is not accurate for livestreams
|
||||
info_dict['live_status'] = 'is_live'
|
||||
elif traverse_obj(video_data, ('ongoing_ondemand', 'manifest_url', {url_or_none})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
# XXX: Has all externally-hosted Kaltura content been moved to native hosting?
|
||||
elif kaltura_id := traverse_obj(video_data, ('ongoing_ondemand', 'kaltura', 'id', {str})):
|
||||
metadata = video_data['ongoing_ondemand']
|
||||
info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
}
|
||||
})
|
||||
elif traverse_obj(video_data, ('gone', {dict})):
|
||||
self.raise_no_formats('The content is no longer available', expected=True, video_id=video_id)
|
||||
metadata = video_data['gone']
|
||||
else:
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
raise ExtractorError('Unable to extract content')
|
||||
|
||||
if not info_dict.get('url') and metadata.get('manifest_url'):
|
||||
info_dict['formats'], subs = self._extract_m3u8_formats_and_subtitles(
|
||||
metadata['manifest_url'], video_id, 'mp4', m3u8_id='hls')
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
info_dict = {'formats': formats}
|
||||
|
||||
return {
|
||||
**info_dict,
|
||||
**traverse_obj(json_ld, {
|
||||
'title': 'title',
|
||||
'thumbnails': ('thumbnails', ..., {'url': 'url'}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str)
|
||||
or episode or info.get('title')),
|
||||
'title': episode,
|
||||
'description': description,
|
||||
'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str)
|
||||
or series),
|
||||
'series': series,
|
||||
'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None))
|
||||
or int_or_none(season_number)),
|
||||
'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none)
|
||||
or int_or_none(episode_number)),
|
||||
'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})),
|
||||
'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none),
|
||||
'episode_number': int_or_none(episode_number),
|
||||
'subtitles': subtitles or None,
|
||||
'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', 'fin', {str}),
|
||||
'description': ('description', 'fin', {str}),
|
||||
'series': ('series', 'title', 'fin', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'age_limit': ('content_rating', 'age_restriction', {int_or_none}),
|
||||
'release_timestamp': ('start_time', {parse_iso8601}),
|
||||
'duration': ('duration', 'duration_in_seconds', {int_or_none}),
|
||||
}),
|
||||
**info_dict,
|
||||
}
|
||||
|
||||
@@ -247,7 +247,7 @@ class YouPornListBase(InfoExtractor):
|
||||
if not html:
|
||||
return
|
||||
for element in get_elements_html_by_class('video-title', html):
|
||||
if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
|
||||
if video_url := traverse_obj(element, ({extract_attributes}, 'href', {urljoin(url)})):
|
||||
yield self.url_result(video_url)
|
||||
|
||||
if page_num is not None:
|
||||
|
||||
@@ -644,13 +644,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
|
||||
|
||||
if refresh_token:
|
||||
refresh_token = refresh_token.strip('\'') or None
|
||||
|
||||
# Allow refresh token passed to initialize cache
|
||||
if refresh_token:
|
||||
msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
|
||||
if self.get_param('cachedir') is not False:
|
||||
msg += ' and caching token to disk; you should supply an empty password next time'
|
||||
self.to_screen(msg)
|
||||
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
|
||||
else:
|
||||
refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
||||
|
||||
refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
|
||||
if refresh_token:
|
||||
YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
|
||||
try:
|
||||
@@ -3610,7 +3611,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'frameworkUpdates', 'entityBatchUpdate', 'mutations',
|
||||
lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
|
||||
'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
|
||||
'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
|
||||
'start_time': ('startMillis', {float_or_none(scale=1000)}),
|
||||
'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
|
||||
'value': ('intensityScoreNormalized', {float_or_none}),
|
||||
})) or None
|
||||
@@ -3636,7 +3637,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'author_is_verified': ('author', 'isVerified', {bool}),
|
||||
'author_url': ('author', 'channelCommand', 'innertubeCommand', (
|
||||
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
|
||||
), {lambda x: urljoin('https://www.youtube.com', x)}),
|
||||
), {urljoin('https://www.youtube.com')}),
|
||||
}, get_all=False),
|
||||
'is_favorited': (None if toolbar_entity_payload is None else
|
||||
toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
|
||||
@@ -4303,7 +4304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
|
||||
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
|
||||
format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
|
||||
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||
# Make sure to avoid false positives with small duration differences.
|
||||
@@ -4776,7 +4777,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'live_status': live_status,
|
||||
'release_timestamp': live_start_time,
|
||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
|
||||
}
|
||||
|
||||
subtitles = {}
|
||||
@@ -7857,7 +7858,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang'),
|
||||
'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang'),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ class ZaikoIE(ZaikoBaseIE):
|
||||
'uploader': ('profile', 'name', {str}),
|
||||
'uploader_id': ('profile', 'id', {str_or_none}),
|
||||
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
|
||||
'categories': ('event', 'genres', ..., {lambda x: x or None}),
|
||||
'categories': ('event', 'genres', ..., filter),
|
||||
}),
|
||||
'alt_title': traverse_obj(initial_event_info, ('title', {str})),
|
||||
'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)],
|
||||
|
||||
Reference in New Issue
Block a user