mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 01:18:30 +00:00
[ie/educast] Address PR review comments
Removed local imports and used module-level ones Switched to traverse_obj-based metadata unpacking Avoided overwriting resolution in formats; removed duration from format dicts Removed redundant max_downloads logic (handled by core) Replaced to_screen with report_warning for error in data_json Added expected_warnings for the third EducastIE test Handled missing content-disposition header Co-authored-by: Filipe Resendes <filipe.resendes@tecnico.ulisboa.pt>
This commit is contained in:
parent
b7d54b33e9
commit
4978e987a1
@ -1,21 +1,44 @@
|
|||||||
from .common import InfoExtractor
|
import re
|
||||||
|
from urllib import parse
|
||||||
|
|
||||||
|
from .common import (
|
||||||
|
ExtractorError,
|
||||||
|
InfoExtractor,
|
||||||
|
)
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
str_or_none,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EducastIE(InfoExtractor):
|
class EducastBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www)?educast\.fccn\.pt/vod/clips/(?P<id>[a-zA-Z0-9]+)'
|
|
||||||
_API_BASE = 'https://educast.fccn.pt'
|
_API_BASE = 'https://educast.fccn.pt'
|
||||||
|
|
||||||
_TESTS = [
|
@staticmethod
|
||||||
{
|
def _paginate_and_collect(get_page_func, parse_func):
|
||||||
|
videos = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
webpage = get_page_func(page)
|
||||||
|
if not webpage:
|
||||||
|
break
|
||||||
|
new_videos = parse_func(webpage)
|
||||||
|
found = False
|
||||||
|
for v in new_videos:
|
||||||
|
if not any(existing['id'] == v['id'] for existing in videos):
|
||||||
|
videos.append(v)
|
||||||
|
found = True
|
||||||
|
if not found:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return videos
|
||||||
|
|
||||||
|
|
||||||
|
class EducastIE(EducastBaseIE):
|
||||||
|
_VALID_URL = r'https?://(www)?educast\.fccn\.pt/vod/clips/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
'note': 'test for public Educast video downloading the merged format',
|
'note': 'test for public Educast video downloading the merged format',
|
||||||
'url': 'https://educast.fccn.pt/vod/clips/2o06o2c6hm/streaming.html',
|
'url': 'https://educast.fccn.pt/vod/clips/2o06o2c6hm/streaming.html',
|
||||||
'md5': '264b3e2f0c6c5d3c8e1a86e57f21d0bc',
|
'md5': '264b3e2f0c6c5d3c8e1a86e57f21d0bc',
|
||||||
@ -33,54 +56,9 @@ class EducastIE(InfoExtractor):
|
|||||||
'timestamp': 1410946740,
|
'timestamp': 1410946740,
|
||||||
'upload_date': '20140917',
|
'upload_date': '20140917',
|
||||||
'license': 'http://creativecommons.org/licenses/by-nc-nd/2.5/pt/',
|
'license': 'http://creativecommons.org/licenses/by-nc-nd/2.5/pt/',
|
||||||
'formats': [
|
'duration': 1041,
|
||||||
{
|
|
||||||
'format_id': 'presenter-0',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'mp4a.40.2',
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'format_id': 'presenter-1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.77.40',
|
|
||||||
'acodec': 'mp4a.40.2',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'presenter-2',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.4d4028',
|
|
||||||
'acodec': 'none',
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
'fps': 25,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'presentation-0',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.77.40',
|
|
||||||
'acodec': 'none',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'presentation-1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.4d4028',
|
|
||||||
'acodec': 'none',
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
'fps': 25,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'merged',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': 'https',
|
|
||||||
'format_note': 'single stream, may be lower res',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'note': 'test for private Educast video downloading the merged format',
|
'note': 'test for private Educast video downloading the merged format',
|
||||||
'url': 'https://educast.fccn.pt/vod/clips/jhwehqk9/streaming.html',
|
'url': 'https://educast.fccn.pt/vod/clips/jhwehqk9/streaming.html',
|
||||||
'md5': '242a4a8d1a84a4c3aab93771c3da244e',
|
'md5': '242a4a8d1a84a4c3aab93771c3da244e',
|
||||||
@ -96,40 +74,10 @@ class EducastIE(InfoExtractor):
|
|||||||
'thumbnail': 'https://educast.fccn.pt/img/clips/jhwehqk9/delivery/cover',
|
'thumbnail': 'https://educast.fccn.pt/img/clips/jhwehqk9/delivery/cover',
|
||||||
'categories': ['Ciências Naturais e Matemática', 'Universidade de Lisboa'],
|
'categories': ['Ciências Naturais e Matemática', 'Universidade de Lisboa'],
|
||||||
'license': 'http://creativecommons.org/licenses/by/4.0/',
|
'license': 'http://creativecommons.org/licenses/by/4.0/',
|
||||||
'formats': [
|
'duration': 2756,
|
||||||
{
|
|
||||||
'format_id': 'presenter-0',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'mp4a.40.2',
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'presenter-1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.77.40',
|
|
||||||
'acodec': 'mp4a.40.2',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'presenter-2',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': 'avc1.4d4028',
|
|
||||||
'acodec': 'none',
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
'fps': 25,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'format_id': 'merged',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': 'https',
|
|
||||||
'format_note': 'single stream, may be lower res',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
'skip': 'This video is private and requires authentication to access',
|
'skip': 'This video is private and requires authentication to access',
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'note': 'test for deprecated streaming url, should rely on fallback',
|
'note': 'test for deprecated streaming url, should rely on fallback',
|
||||||
'url': 'https://educast.fccn.pt/vod/clips/2by2fw4fkx/streaming.html',
|
'url': 'https://educast.fccn.pt/vod/clips/2by2fw4fkx/streaming.html',
|
||||||
'md5': '88055700118db7411d1cc0da48ca1747',
|
'md5': '88055700118db7411d1cc0da48ca1747',
|
||||||
@ -138,9 +86,9 @@ class EducastIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Teoria 3A. Sistemas de Equaces Diferenciais Lineares de Primeira Ordem_',
|
'title': 'Teoria 3A. Sistemas de Equaces Diferenciais Lineares de Primeira Ordem_',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Este vídeo não está preparado para HTML5'],
|
||||||
'skip': 'This video is private and requires authentication to access',
|
'skip': 'This video is private and requires authentication to access',
|
||||||
},
|
}]
|
||||||
]
|
|
||||||
|
|
||||||
def parse_timestamp(self, timestamp_str):
|
def parse_timestamp(self, timestamp_str):
|
||||||
if isinstance(timestamp_str, str) and '.' in timestamp_str:
|
if isinstance(timestamp_str, str) and '.' in timestamp_str:
|
||||||
@ -163,47 +111,48 @@ def _extract_video_formats(self, video_json, video_id):
|
|||||||
formats += self._extract_m3u8_formats(hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', fatal=False)
|
formats += self._extract_m3u8_formats(hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', fatal=False)
|
||||||
|
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f['format_id'] = str_or_none(video_json.get('role'))
|
f['format_id'] = video_json.get('role')
|
||||||
f['width'] = int_or_none(video_json.get('width'))
|
|
||||||
f['height'] = int_or_none(video_json.get('height'))
|
|
||||||
f['duration'] = float_or_none(video_json.get('duration'))
|
|
||||||
f['filesize_approx'] = int_or_none(float_or_none(f.get('duration')) * float_or_none(f.get('tbr')) * 1000 / 8)
|
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_from_json(self, video_id):
|
def _extract_from_json(self, video_id):
|
||||||
data_json_url = f'https://educast.fccn.pt/vod/clips/{video_id}/video_player/data.json'
|
data_json_url = f'https://educast.fccn.pt/vod/clips/{video_id}/video_player/data.json'
|
||||||
data_json = self._download_json(data_json_url, video_id, fatal=False)
|
try:
|
||||||
if not data_json:
|
data_json = self._download_json(data_json_url, video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
self.report_warning(e)
|
||||||
return None
|
return None
|
||||||
if data_json.get('error'):
|
if data_json.get('error'):
|
||||||
self.to_screen(data_json.get('error'))
|
self.report_warning(data_json.get('error'))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': str_or_none(traverse_obj(data_json, ('clip', 'name'))),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'alt_title': str_or_none(data_json.get('subtitle')),
|
**traverse_obj(data_json, {
|
||||||
'description': str_or_none(data_json.get('clipDescription')),
|
'title': ('clip', 'name', {str}),
|
||||||
'uploader': str_or_none(data_json.get('author')),
|
'alt_title': ('subtitle', {str}),
|
||||||
'timestamp': self.parse_timestamp(data_json.get('timestamp')),
|
'description': ('clipDescription', {str}),
|
||||||
'thumbnail': str_or_none(data_json.get('cover')),
|
'uploader': ('author', {str}),
|
||||||
'license': str_or_none(data_json.get('licenceURL')),
|
'timestamp': ('timestamp', {self.parse_timestamp}),
|
||||||
'webpage_url': str_or_none(data_json.get('url')),
|
'thumbnail': ('cover', {str}),
|
||||||
'channel': str_or_none(traverse_obj(data_json, ('channel', 'name'))),
|
'license': ('licenceURL', {str}),
|
||||||
'channel_url': str_or_none(traverse_obj(data_json, ('channel', 'url'))),
|
'webpage_url': ('url', {str}),
|
||||||
|
'channel': ('channel', 'name', {str}),
|
||||||
|
'channel_url': ('channel', 'url', {str}),
|
||||||
|
'duration': ('videos', 0, 'duration', {int}),
|
||||||
|
}),
|
||||||
'categories': [cat for cat in (
|
'categories': [cat for cat in (
|
||||||
str_or_none(traverse_obj(data_json, ('area', 'name'))),
|
traverse_obj(data_json, ('area', 'name'), expected_type=str),
|
||||||
str_or_none(traverse_obj(data_json, ('institution', 'name'))),
|
traverse_obj(data_json, ('institution', 'name'), expected_type=str),
|
||||||
) if cat],
|
) if cat],
|
||||||
}
|
}
|
||||||
|
|
||||||
for video_json in data_json.get('videos') or []:
|
for video_json in data_json.get('videos') or []:
|
||||||
formats.extend(self._extract_video_formats(video_json, video_id))
|
formats.extend(self._extract_video_formats(video_json, video_id))
|
||||||
|
|
||||||
download_url = str_or_none(data_json.get('downloadURL'))
|
download_url = data_json.get('downloadURL')
|
||||||
if download_url:
|
if download_url:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'merged',
|
'format_id': 'merged',
|
||||||
@ -215,8 +164,6 @@ def _extract_from_json(self, video_id):
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
def _try_fallback(self, url, video_id):
|
def _try_fallback(self, url, video_id):
|
||||||
import re
|
|
||||||
|
|
||||||
# Last resort for videos with no working streaming option
|
# Last resort for videos with no working streaming option
|
||||||
KNOWN_BASENAMES = ['desktop.mp4', 'ipod.m4v', 'quicktime.mov']
|
KNOWN_BASENAMES = ['desktop.mp4', 'ipod.m4v', 'quicktime.mov']
|
||||||
for basename in KNOWN_BASENAMES:
|
for basename in KNOWN_BASENAMES:
|
||||||
@ -231,7 +178,9 @@ def _try_fallback(self, url, video_id):
|
|||||||
if ext not in ('mp4', 'm4v', 'mov'):
|
if ext not in ('mp4', 'm4v', 'mov'):
|
||||||
continue
|
continue
|
||||||
title = None
|
title = None
|
||||||
m = re.search(r'filename\s*=\s*"([^"]+)"', response.get_header('content-disposition'), re.IGNORECASE)
|
ext_header = response.get_header('content-disposition')
|
||||||
|
if ext_header:
|
||||||
|
m = re.search(r'filename\s*=\s*"([^"]+)"', ext_header, re.IGNORECASE)
|
||||||
if m:
|
if m:
|
||||||
title = m.group(1).strip().removesuffix(f'.{ext}')
|
title = m.group(1).strip().removesuffix(f'.{ext}')
|
||||||
return {
|
return {
|
||||||
@ -244,35 +193,11 @@ def _real_extract(self, url):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self._extract_from_json(video_id) or self._try_fallback(url, video_id)
|
return self._extract_from_json(video_id) or self._try_fallback(url, video_id)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _paginate_and_collect(get_page_func, parse_func, max_videos=None):
|
|
||||||
videos = []
|
|
||||||
page = 1
|
|
||||||
while True:
|
|
||||||
if max_videos is not None and len(videos) >= max_videos:
|
|
||||||
break
|
|
||||||
webpage = get_page_func(page)
|
|
||||||
if not webpage:
|
|
||||||
break
|
|
||||||
new_videos = parse_func(webpage)
|
|
||||||
found = False
|
|
||||||
for v in new_videos:
|
|
||||||
if not any(existing['id'] == v['id'] for existing in videos):
|
|
||||||
videos.append(v)
|
|
||||||
found = True
|
|
||||||
if max_videos is not None and len(videos) >= max_videos:
|
|
||||||
break
|
|
||||||
if not found or (max_videos is not None and len(videos) >= max_videos):
|
|
||||||
break
|
|
||||||
page += 1
|
|
||||||
return videos
|
|
||||||
|
|
||||||
|
class EducastChannelIE(EducastBaseIE):
|
||||||
class EducastChannelIE(InfoExtractor):
|
|
||||||
IE_NAME = 'educast:channel'
|
IE_NAME = 'educast:channel'
|
||||||
_VALID_URL = r'https?://(?:www\.)?educast\.fccn\.pt/vod/channels/(?P<id>[a-zA-Z0-9]+)/?(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?educast\.fccn\.pt/vod/channels/(?P<id>[a-zA-Z0-9]+)/?(?:$|[?#])'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
|
||||||
'note': 'test for private Educast Channel',
|
'note': 'test for private Educast Channel',
|
||||||
'url': 'https://educast.fccn.pt/vod/channels/2o0eonmrak',
|
'url': 'https://educast.fccn.pt/vod/channels/2o0eonmrak',
|
||||||
'info_dict':
|
'info_dict':
|
||||||
@ -282,8 +207,7 @@ class EducastChannelIE(InfoExtractor):
|
|||||||
'description': str,
|
'description': str,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 26,
|
'playlist_mincount': 26,
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'note': 'test for private Educast Channel',
|
'note': 'test for private Educast Channel',
|
||||||
'url': 'https://educast.fccn.pt/vod/channels/2fudccnyj7',
|
'url': 'https://educast.fccn.pt/vod/channels/2fudccnyj7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -293,11 +217,9 @@ class EducastChannelIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 26,
|
'playlist_mincount': 26,
|
||||||
'skip': 'This channel is private and requires authentication to access',
|
'skip': 'This channel is private and requires authentication to access',
|
||||||
},
|
}]
|
||||||
]
|
|
||||||
|
|
||||||
def _extract_video_links_from_html(self, webpage, ie_key):
|
def _extract_video_links_from_html(self, webpage, ie_key):
|
||||||
import re
|
|
||||||
videos_by_id = {}
|
videos_by_id = {}
|
||||||
pattern = r'href="https://educast\.fccn\.pt/vod/clips/(?P<id>[a-zA-Z0-9]+)/(?P<option>[^?"/]+)'
|
pattern = r'href="https://educast\.fccn\.pt/vod/clips/(?P<id>[a-zA-Z0-9]+)/(?P<option>[^?"/]+)'
|
||||||
for m in re.finditer(pattern, webpage or '', re.IGNORECASE):
|
for m in re.finditer(pattern, webpage or '', re.IGNORECASE):
|
||||||
@ -321,17 +243,12 @@ def _extract_video_links_from_html(self, webpage, ie_key):
|
|||||||
return videos
|
return videos
|
||||||
|
|
||||||
def _extract_videos(self, url, channel_id, webpage=None):
|
def _extract_videos(self, url, channel_id, webpage=None):
|
||||||
max_downloads = None
|
|
||||||
if hasattr(self, '_downloader') and self._downloader:
|
|
||||||
max_downloads = self._downloader.params.get('max_downloads')
|
|
||||||
|
|
||||||
def get_page(page):
|
def get_page(page):
|
||||||
import urllib.parse
|
url_parts = list(parse.urlparse(url))
|
||||||
url_parts = list(urllib.parse.urlparse(url))
|
query = parse.parse_qs(url_parts[4])
|
||||||
query = urllib.parse.parse_qs(url_parts[4])
|
|
||||||
query['page'] = [str(page)]
|
query['page'] = [str(page)]
|
||||||
url_parts[4] = urllib.parse.urlencode(query, doseq=True)
|
url_parts[4] = parse.urlencode(query, doseq=True)
|
||||||
page_url = urllib.parse.urlunparse(url_parts)
|
page_url = parse.urlunparse(url_parts)
|
||||||
|
|
||||||
return self._download_webpage(page_url, channel_id, note=f'Downloading page {page}', fatal=False)
|
return self._download_webpage(page_url, channel_id, note=f'Downloading page {page}', fatal=False)
|
||||||
|
|
||||||
@ -339,7 +256,7 @@ def parse_func(page_result):
|
|||||||
return self._extract_video_links_from_html(page_result, EducastIE.ie_key())
|
return self._extract_video_links_from_html(page_result, EducastIE.ie_key())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
videos = EducastIE._paginate_and_collect(get_page, parse_func, max_videos=max_downloads)
|
videos = EducastIE._paginate_and_collect(get_page, parse_func)
|
||||||
if videos:
|
if videos:
|
||||||
return videos
|
return videos
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -366,11 +283,10 @@ def _real_extract(self, url):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class EducastResultsIE(InfoExtractor):
|
class EducastResultsIE(EducastBaseIE):
|
||||||
IE_NAME = 'educast:results'
|
IE_NAME = 'educast:results'
|
||||||
_VALID_URL = r'https?://(?:www\.)?educast\.fccn\.pt/results\?(?P<params>(search|organization|category|channel)=[^#]+)'
|
_VALID_URL = r'https?://(?:www\.)?educast\.fccn\.pt/results\?(?P<params>(search|organization|category|channel)=[^#]+)'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
|
||||||
'url': 'https://educast.fccn.pt/results?search=Sat%C3%A9lite',
|
'url': 'https://educast.fccn.pt/results?search=Sat%C3%A9lite',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'search=Sat%C3%A9lite',
|
'id': 'search=Sat%C3%A9lite',
|
||||||
@ -378,38 +294,30 @@ class EducastResultsIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
'params': {'max_downloads': 3},
|
'params': {'max_downloads': 3},
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'https://educast.fccn.pt/results?organization=fccn.pt',
|
'url': 'https://educast.fccn.pt/results?organization=fccn.pt',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'organization=fccn.pt',
|
'id': 'organization=fccn.pt',
|
||||||
'title': 'Results for organization=fccn.pt',
|
'title': 'Results for organization=fccn.pt',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
'params': {'max_downloads': 3},
|
}, {
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://educast.fccn.pt/results?category=Technology%20&%20Applied%20sciences',
|
'url': 'https://educast.fccn.pt/results?category=Technology%20&%20Applied%20sciences',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'category=Technology%20&%20Applied%20sciences',
|
'id': 'category=Technology%20&%20Applied%20sciences',
|
||||||
'title': 'Results for category=Technology%20&%20Applied%20sciences',
|
'title': 'Results for category=Technology%20&%20Applied%20sciences',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
'params': {'max_downloads': 3},
|
}, {
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://educast.fccn.pt/results?channel=16mfovn0pt',
|
'url': 'https://educast.fccn.pt/results?channel=16mfovn0pt',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'channel=16mfovn0pt',
|
'id': 'channel=16mfovn0pt',
|
||||||
'title': 'Results for channel=16mfovn0pt',
|
'title': 'Results for channel=16mfovn0pt',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
'params': {'max_downloads': 3},
|
}]
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _extract_video_links_from_html(self, webpage, ie_key):
|
def _extract_video_links_from_html(self, webpage, ie_key):
|
||||||
import re
|
|
||||||
videos = []
|
videos = []
|
||||||
for m in re.finditer(r'/vod/clips/([a-zA-Z0-9]+)/streaming.html', webpage or '', re.IGNORECASE):
|
for m in re.finditer(r'/vod/clips/([a-zA-Z0-9]+)/streaming.html', webpage or '', re.IGNORECASE):
|
||||||
video_id = m.group(1)
|
video_id = m.group(1)
|
||||||
@ -424,28 +332,22 @@ def _extract_video_links_from_html(self, webpage, ie_key):
|
|||||||
return videos
|
return videos
|
||||||
|
|
||||||
def _extract_videos(self, params, webpage=None):
|
def _extract_videos(self, params, webpage=None):
|
||||||
import urllib.parse
|
|
||||||
max_downloads = None
|
|
||||||
if hasattr(self, '_downloader') and self._downloader:
|
|
||||||
max_downloads = self._downloader.params.get('max_downloads')
|
|
||||||
|
|
||||||
def get_page(page):
|
def get_page(page):
|
||||||
base_url = f'{EducastIE._API_BASE}/results?{params}'
|
base_url = f'{EducastIE._API_BASE}/results?{params}'
|
||||||
url_parts = list(urllib.parse.urlparse(base_url))
|
url_parts = list(parse.urlparse(base_url))
|
||||||
query = urllib.parse.parse_qs(url_parts[4])
|
query = parse.parse_qs(url_parts[4])
|
||||||
query['page'] = [str(page)]
|
query['page'] = [str(page)]
|
||||||
url_parts[4] = urllib.parse.urlencode(query, doseq=True)
|
url_parts[4] = parse.urlencode(query, doseq=True)
|
||||||
page_url = urllib.parse.urlunparse(url_parts)
|
page_url = parse.urlunparse(url_parts)
|
||||||
return self._download_webpage(page_url, params, note=f'Downloading results page {page}', fatal=False)
|
return self._download_webpage(page_url, params, note=f'Downloading results page {page}', fatal=False)
|
||||||
|
|
||||||
def parse_func(webpage):
|
def parse_func(webpage):
|
||||||
return self._extract_video_links_from_html(webpage, EducastIE.ie_key())
|
return self._extract_video_links_from_html(webpage, EducastIE.ie_key())
|
||||||
return EducastIE._paginate_and_collect(get_page, parse_func, max_videos=max_downloads)
|
return EducastIE._paginate_and_collect(get_page, parse_func)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
import urllib.parse
|
|
||||||
params = self._match_valid_url(url).group('params')
|
params = self._match_valid_url(url).group('params')
|
||||||
params_decoded = urllib.parse.unquote(params)
|
params_decoded = parse.unquote(params)
|
||||||
webpage = self._download_webpage(url, params)
|
webpage = self._download_webpage(url, params)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
Loading…
Reference in New Issue
Block a user