from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( float_or_none, int_or_none, mimetype2ext, str_or_none, traverse_obj, unified_timestamp, ) class EducastIE(InfoExtractor): _VALID_URL = r'https?://(www)?educast\.fccn\.pt/vod/clips/(?P[a-zA-Z0-9]+)' _API_BASE = 'https://educast.fccn.pt' _TESTS = [ { 'note': 'test for public Educast video downloading the merged format', 'url': 'https://educast.fccn.pt/vod/clips/2o06o2c6hm/streaming.html', 'md5': '264b3e2f0c6c5d3c8e1a86e57f21d0bc', 'info_dict': { 'id': '2o06o2c6hm', 'ext': 'mp4', 'title': 'Fundamentos de Bases de Dados', 'alt_title': '', 'description': '', 'uploader': 'Professor Luís Cavique', 'channel': 'UAB - Fundamentos de Base de dados', 'channel_url': 'https://educast.fccn.pt/results?channel=k06h42n0w', 'thumbnail': 'https://educast.fccn.pt/img/clips/2o06o2c6hm/delivery/cover', 'categories': ['Tecnologia e Ciências Aplicadas', 'FCCN'], 'timestamp': 1410946740, 'upload_date': '20140917', 'license': 'http://creativecommons.org/licenses/by-nc-nd/2.5/pt/', 'formats': [ { 'format_id': 'presenter-0', 'ext': 'm4a', 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'protocol': 'http_dash_segments', }, { 'format_id': 'presenter-1', 'ext': 'mp4', 'vcodec': 'avc1.77.40', 'acodec': 'mp4a.40.2', 'protocol': 'm3u8_native', }, { 'format_id': 'presenter-2', 'ext': 'mp4', 'vcodec': 'avc1.4d4028', 'acodec': 'none', 'protocol': 'http_dash_segments', 'fps': 25, }, { 'format_id': 'presentation-0', 'ext': 'mp4', 'vcodec': 'avc1.77.40', 'acodec': 'none', 'protocol': 'm3u8_native', }, { 'format_id': 'presentation-1', 'ext': 'mp4', 'vcodec': 'avc1.4d4028', 'acodec': 'none', 'protocol': 'http_dash_segments', 'fps': 25, }, { 'format_id': 'merged', 'ext': 'mp4', 'protocol': 'https', 'format_note': 'single stream, may be lower res', }, ], }, }, { 'note': 'test for private Educast video downloading the merged format', 'url': 'https://educast.fccn.pt/vod/clips/jhwehqk9/streaming.html', 'md5': '242a4a8d1a84a4c3aab93771c3da244e', 'info_dict': { 'id': 'jhwehqk9', 'ext': 'mp4', 'title': ' Exercícios 8B. Equações Diferenciais Parciais', 'alt_title': '', 'description': '', 'uploader': ' Rui Miguel Saramago', 'channel': 'Cálculo Diferencial e Integral III - Aulas de Recuperação', 'channel_url': 'https://educast.fccn.pt/results?channel=2fudccnyj7', 'thumbnail': 'https://educast.fccn.pt/img/clips/jhwehqk9/delivery/cover', 'categories': ['Ciências Naturais e Matemática', 'Universidade de Lisboa'], 'license': 'http://creativecommons.org/licenses/by/4.0/', 'formats': [ { 'format_id': 'presenter-0', 'ext': 'm4a', 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'protocol': 'http_dash_segments', }, { 'format_id': 'presenter-1', 'ext': 'mp4', 'vcodec': 'avc1.77.40', 'acodec': 'mp4a.40.2', 'protocol': 'm3u8_native', }, { 'format_id': 'presenter-2', 'ext': 'mp4', 'vcodec': 'avc1.4d4028', 'acodec': 'none', 'protocol': 'http_dash_segments', 'fps': 25, }, { 'format_id': 'merged', 'ext': 'mp4', 'protocol': 'https', 'format_note': 'single stream, may be lower res', }, ], }, 'skip': 'This video is private and requires authentication to access', }, { 'note': 'test for deprecated streaming url, should rely on fallback', 'url': 'https://educast.fccn.pt/vod/clips/2by2fw4fkx/streaming.html', 'md5': '88055700118db7411d1cc0da48ca1747', 'info_dict': { 'id': '2by2fw4fkx', 'ext': 'mp4', 'title': 'Teoria 3A. Sistemas de Equaces Diferenciais Lineares de Primeira Ordem_', }, 'skip': 'This video is private and requires authentication to access', }, ] def parse_timestamp(self, timestamp_str): if isinstance(timestamp_str, str) and '.' in timestamp_str: day, month, year_time = timestamp_str.split('.', 2) year, time = year_time.split(' ', 1) reformatted = f'{year}-{month}-{day} {time}' timestamp = unified_timestamp(reformatted) if timestamp is not None: timestamp -= 3600 # Lisbon time (UTC+1) return timestamp def _extract_video_formats(self, video_json, video_id): formats = [] dash_url = traverse_obj(video_json, ('dash', 'url')) if dash_url: formats += self._extract_mpd_formats(dash_url, video_id, mpd_id='dash', fatal=False) hls_url = traverse_obj(video_json, ('hls', 'url')) if hls_url: formats += self._extract_m3u8_formats(hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', fatal=False) for f in formats: f['format_id'] = str_or_none(video_json.get('role')) f['width'] = int_or_none(video_json.get('width')) f['height'] = int_or_none(video_json.get('height')) f['duration'] = float_or_none(video_json.get('duration')) f['filesize_approx'] = int_or_none(float_or_none(f.get('duration')) * float_or_none(f.get('tbr')) * 1000 / 8) return formats def _extract_from_json(self, video_id): data_json_url = f'https://educast.fccn.pt/vod/clips/{video_id}/video_player/data.json' data_json = self._download_json(data_json_url, video_id, fatal=False) if not data_json: return None if data_json.get('error'): self.to_screen(data_json.get('error')) return None formats = [] info = { 'id': video_id, 'title': str_or_none(traverse_obj(data_json, ('clip', 'name'))), 'formats': formats, 'alt_title': str_or_none(data_json.get('subtitle')), 'description': str_or_none(data_json.get('clipDescription')), 'uploader': str_or_none(data_json.get('author')), 'timestamp': self.parse_timestamp(data_json.get('timestamp')), 'thumbnail': str_or_none(data_json.get('cover')), 'license': str_or_none(data_json.get('licenceURL')), 'webpage_url': str_or_none(data_json.get('url')), 'channel': str_or_none(traverse_obj(data_json, ('channel', 'name'))), 'channel_url': str_or_none(traverse_obj(data_json, ('channel', 'url'))), 'categories': [cat for cat in ( str_or_none(traverse_obj(data_json, ('area', 'name'))), str_or_none(traverse_obj(data_json, ('institution', 'name'))), ) if cat], } for video_json in data_json.get('videos') or []: formats.extend(self._extract_video_formats(video_json, video_id)) download_url = str_or_none(data_json.get('downloadURL')) if download_url: formats.append({ 'format_id': 'merged', 'url': download_url, 'quality': 0, 'format_note': 'single stream, may be lower res', }) return info def _try_fallback(self, url, video_id): import re # Last resort for videos with no working streaming option KNOWN_BASENAMES = ['desktop.mp4', 'ipod.m4v', 'quicktime.mov'] for basename in KNOWN_BASENAMES: format_url = url.replace('streaming.html', basename) response = self._request_webpage( HEADRequest(format_url), video_id, note=f'Checking availability of {basename} fallback', fatal=False, errnote=False) if not response: continue ext = mimetype2ext(response.get_header('content-type')) if ext not in ('mp4', 'm4v', 'mov'): continue title = None m = re.search(r'filename\s*=\s*"([^"]+)"', response.get_header('content-disposition'), re.IGNORECASE) if m: title = m.group(1).strip().removesuffix(f'.{ext}') return { 'id': video_id, 'title': title, 'url': format_url, } def _real_extract(self, url): video_id = self._match_id(url) return self._extract_from_json(video_id) or self._try_fallback(url, video_id) @staticmethod def _paginate_and_collect(get_page_func, parse_func, max_videos=None): videos = [] page = 1 while True: if max_videos is not None and len(videos) >= max_videos: break webpage = get_page_func(page) if not webpage: break new_videos = parse_func(webpage) found = False for v in new_videos: if not any(existing['id'] == v['id'] for existing in videos): videos.append(v) found = True if max_videos is not None and len(videos) >= max_videos: break if not found or (max_videos is not None and len(videos) >= max_videos): break page += 1 return videos class EducastChannelIE(InfoExtractor): IE_NAME = 'educast:channel' _VALID_URL = r'https?://(?:www\.)?educast\.fccn\.pt/vod/channels/(?P[a-zA-Z0-9]+)/?(?:$|[?#])' _TESTS = [ { 'note': 'test for private Educast Channel', 'url': 'https://educast.fccn.pt/vod/channels/2o0eonmrak', 'info_dict': { 'id': '2o0eonmrak', 'title': 'Vídeos Institucionais FCT-FCCN', 'description': str, }, 'playlist_mincount': 26, }, { 'note': 'test for private Educast Channel', 'url': 'https://educast.fccn.pt/vod/channels/2fudccnyj7', 'info_dict': { 'id': '2fudccnyj7', 'title': 'Cálculo Diferencial e Integral III - Aulas de Recuperação', 'description': str, }, 'playlist_mincount': 26, 'skip': 'This channel is private and requires authentication to access', }, ] def _extract_video_links_from_html(self, webpage, ie_key): import re videos_by_id = {} pattern = r'href="https://educast\.fccn\.pt/vod/clips/(?P[a-zA-Z0-9]+)/(?P