From 6486009b1e0174314056060cd1468dd91c2a9841 Mon Sep 17 00:00:00 2001 From: Fridolin Kutterer Date: Sat, 11 Nov 2023 19:42:58 +0100 Subject: [PATCH] Cleaned up Class structure, added extractors to index --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/lecturio.py | 126 +++++++++++++++----------------- 2 files changed, 60 insertions(+), 67 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 06340fcd8..933632daf 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -948,6 +948,7 @@ from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, + LecturioDeIE, LecturioCourseIE, LecturioDeCourseIE, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index e9b7d8b9a..46bfaa3c8 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -14,64 +14,9 @@ ) -class LecturioBaseIE(InfoExtractor): - _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' - _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' - _LOGIN_URL = 'https://app.lecturio.com/en/login' - _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' - _NETRC_MACHINE = 'lecturio' +class LecturioIE(InfoExtractor): + _VALID_URL = r'https://app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' - is_DE = None - - # Find out if url is german before starting anything else - def extract(self, url): - self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False - return super().extract(url) - - def _perform_login(self, username, password): - - login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL - # Sets some cookies - _, urlh = self._download_webpage_handle( - login_url, None, 'Downloading login popup') - - def is_logged(url_handle): - return login_url not in url_handle.geturl() - - # Already logged in - if is_logged(urlh): - return - - login_form = { - 'signin[email]': username, - 'signin[password]': password, - 'signin[remember]': 'on', - } - - response, urlh = self._download_webpage_handle( - login_url, None, 'Logging in', - data=urlencode_postdata(login_form)) - - # Logged in successfully - if is_logged(urlh): - return - - errors = self._html_search_regex( - r'(?s)]+class=["\']error_list[^>]+>(.+?)', response, - 'errors', default=None) - if errors: - raise ExtractorError('Unable to login: %s' % errors, expected=True) - raise ExtractorError('Unable to log in') - - -class LecturioIE(LecturioBaseIE): - _VALID_URL = r'''(?x) - https:// - (?: - app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))| - (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag - ) - ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': '9a42cf1d8282a6311bf7211bbde26fde', @@ -81,14 +26,16 @@ class LecturioIE(LecturioBaseIE): 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', - }, { - 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', - 'only_matching': True, }, { 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', 'only_matching': True, }] + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' + _LOGIN_URL = 'https://app.lecturio.com/en/login' + + _NETRC_MACHINE = 'lecturio' + _CC_LANGS = { 'Arabic': 'ar', 'Bulgarian': 'bg', @@ -103,15 +50,48 @@ class LecturioIE(LecturioBaseIE): 'Russian': 'ru', } + def _perform_login(self, username, password): + + # Sets some cookies + _, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(url_handle): + return self._LOGIN_URL not in url_handle.geturl() + + # Already logged in + if is_logged(urlh): + return + + login_form = { + 'signin[email]': username, + 'signin[password]': password, + 'signin[remember]': 'on', + } + + response, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + # Logged in successfully + if is_logged(urlh): + return + + errors = self._html_search_regex( + r'(?s)]+class=["\']error_list[^>]+>(.+?)', response, + 'errors', default=None) + if errors: + raise ExtractorError('Unable to login: %s' % errors, expected=True) + raise ExtractorError('Unable to log in') + def _real_extract(self, url): mobj = self._match_valid_url(url) - nt = mobj.group('nt') or mobj.group('nt_de') + nt = mobj.group('nt') lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' - video = self._download_json( - (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) + video = self._download_json(self._API_BASE_URL + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') @@ -179,8 +159,20 @@ def _real_extract(self, url): 'automatic_captions': automatic_captions, } +# German Lecturio simply requires different URLs +class LecturioDeIE(LecturioIE): + _VALID_URL = r'https://www\.lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag' -class LecturioCourseIE(LecturioBaseIE): + _TESTS = [{ + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] + + _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' + _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' + + +class LecturioCourseIE(LecturioIE): _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', @@ -217,12 +209,12 @@ def _real_extract(self, url): clean_html(course.get('description'))) -class LecturioDeCourseIE(LecturioBaseIE): +class LecturioDeCourseIE(LecturioDeIE): _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, - } + }] def _real_extract(self, url): display_id = self._match_id(url)