From 156c80bf0a22ba1241c8d46ae9c0e98654004439 Mon Sep 17 00:00:00 2001 From: flanter21 Date: Thu, 13 Feb 2025 18:43:21 +0000 Subject: [PATCH] [ie/BlackboardCollaborate] Add support for subtitles, live chat, filesize and videos behind a login wall --- yt_dlp/extractor/blackboardcollaborate.py | 75 +++++++++++++++++++---- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979..a8832b288 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ +import base64 +import json + from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + mimetype2ext, + parse_iso8601, +) +from ..utils.traversal import traverse_obj + +'''APIs references - Blackboard Learn: https://developer.blackboard.com/portal/displayApi + - Blackboard Collaborate: https://github.com/blackboard/BBDN-Collab-Postman-REST''' class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/\?]+) + \??(authToken=(?P[\w\.\-]+))?''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -43,21 +54,63 @@ class BlackboardCollaborateIE(InfoExtractor): ] def _real_extract(self, url): + # Prepare for requests mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = mobj.group('token') + + headers = {'Authorization': f'Bearer {token}'} + base_url = f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}' + + # Try request the way the player handles it when behind a login + if video_info := self._download_json(f'{base_url}/data/secure', video_id, 'Trying auth token', + headers=headers, fatal=False): + video_extra = self._download_json(f'{base_url}', video_id, 'Retrieving extra attributes', + headers=headers, fatal=False) + + # Blackboard will allow redownloading from the same IP without authentication for a while, so if previous method fails, try this + else: + video_info = self._download_json(f'{base_url}/data', video_id, 'Trying fallback') + video_extra = 0 + + # Get metadata + duration = video_info.get('duration') / 1000 + title = video_info.get('name') + upload_date = video_info.get('created') + + # Get streams + stream_formats = [] + streams = video_info.get('extStreams') # Can also use video_info.get('streams') but I don't know its structure + + for current_stream in streams: + stream_formats.append({ + 'url': current_stream['streamUrl'], + 'container': mimetype2ext(current_stream.get('contentType')), + 'filesize': video_extra.get('storageSize', None), + 'aspect_ratio': video_info.get('aspectRatio', ''), + }) + + # Get subtitles + subtitles = {} + subs = video_info.get('subtitles') + for current_subs in subs: + lang_code = current_subs.get('lang') + subtitles.setdefault(lang_code, []).append({ + 'name': current_subs.get('label'), + 'url': current_subs['url'], + }) + + # Get chat + chats = video_info.get('chats') + for current_chat in chats: + subtitles.setdefault('live_chat', []).append({'url': current_chat['url']}) return { 'duration': duration, - 'formats': formats, + 'formats': stream_formats, 'id': video_id, 'timestamp': parse_iso8601(upload_date), + 'subtitles': subtitles, 'title': title, }