From ff740b365e83f0967c1dff807dde434baf1967a2 Mon Sep 17 00:00:00 2001 From: Fridolin Kutterer Date: Thu, 29 Jun 2023 11:16:35 +0200 Subject: [PATCH 1/6] Made Lecturio poll the german API if a german URL is given Formatting fixes --- yt_dlp/extractor/lecturio.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 973764c63..e9b7d8b9a 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -16,16 +16,27 @@ class LecturioBaseIE(InfoExtractor): _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' + _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' + _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' _NETRC_MACHINE = 'lecturio' + is_DE = None + + # Find out if url is german before starting anything else + def extract(self, url): + self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False + return super().extract(url) + def _perform_login(self, username, password): + + login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL # Sets some cookies _, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Downloading login popup') + login_url, None, 'Downloading login popup') def is_logged(url_handle): - return self._LOGIN_URL not in url_handle.geturl() + return login_url not in url_handle.geturl() # Already logged in if is_logged(urlh): @@ -38,7 +49,7 @@ def is_logged(url_handle): } response, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Logging in', + login_url, None, 'Logging in', data=urlencode_postdata(login_form)) # Logged in successfully @@ -98,8 +109,9 @@ def _real_extract(self, url): lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' + video = self._download_json( - self._API_BASE_URL + api_path, display_id) + (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') From 6486009b1e0174314056060cd1468dd91c2a9841 Mon Sep 17 00:00:00 2001 From: Fridolin Kutterer Date: Sat, 11 Nov 2023 19:42:58 +0100 Subject: [PATCH 2/6] Cleaned up Class structure, added extractors to index --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/lecturio.py | 126 +++++++++++++++----------------- 2 files changed, 60 insertions(+), 67 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 06340fcd8..933632daf 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -948,6 +948,7 @@ from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioIE, + LecturioDeIE, LecturioCourseIE, LecturioDeCourseIE, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index e9b7d8b9a..46bfaa3c8 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -14,64 +14,9 @@ ) -class LecturioBaseIE(InfoExtractor): - _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' - _DE_API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' - _LOGIN_URL = 'https://app.lecturio.com/en/login' - _DE_LOGIN_URL = 'https://www.lecturio.de/anmelden.html' - _NETRC_MACHINE = 'lecturio' +class LecturioIE(InfoExtractor): + _VALID_URL = r'https://app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' - is_DE = None - - # Find out if url is german before starting anything else - def extract(self, url): - self.is_DE = True if re.match(r"https://(?:www\.)?lecturio\.de/", url) else False - return super().extract(url) - - def _perform_login(self, username, password): - - login_url = self._DE_LOGIN_URL if self.is_DE else self._LOGIN_URL - # Sets some cookies - _, urlh = self._download_webpage_handle( - login_url, None, 'Downloading login popup') - - def is_logged(url_handle): - return login_url not in url_handle.geturl() - - # Already logged in - if is_logged(urlh): - return - - login_form = { - 'signin[email]': username, - 'signin[password]': password, - 'signin[remember]': 'on', - } - - response, urlh = self._download_webpage_handle( - login_url, None, 'Logging in', - data=urlencode_postdata(login_form)) - - # Logged in successfully - if is_logged(urlh): - return - - errors = self._html_search_regex( - r'(?s)]+class=["\']error_list[^>]+>(.+?)', response, - 'errors', default=None) - if errors: - raise ExtractorError('Unable to login: %s' % errors, expected=True) - raise ExtractorError('Unable to log in') - - -class LecturioIE(LecturioBaseIE): - _VALID_URL = r'''(?x) - https:// - (?: - app\.lecturio\.com/([^/]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))| - (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag - ) - ''' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': '9a42cf1d8282a6311bf7211bbde26fde', @@ -81,14 +26,16 @@ class LecturioIE(LecturioBaseIE): 'title': 'Important Concepts and Terms — Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', - }, { - 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', - 'only_matching': True, }, { 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', 'only_matching': True, }] + _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' + _LOGIN_URL = 'https://app.lecturio.com/en/login' + + _NETRC_MACHINE = 'lecturio' + _CC_LANGS = { 'Arabic': 'ar', 'Bulgarian': 'bg', @@ -103,15 +50,48 @@ class LecturioIE(LecturioBaseIE): 'Russian': 'ru', } + def _perform_login(self, username, password): + + # Sets some cookies + _, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(url_handle): + return self._LOGIN_URL not in url_handle.geturl() + + # Already logged in + if is_logged(urlh): + return + + login_form = { + 'signin[email]': username, + 'signin[password]': password, + 'signin[remember]': 'on', + } + + response, urlh = self._download_webpage_handle( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + # Logged in successfully + if is_logged(urlh): + return + + errors = self._html_search_regex( + r'(?s)]+class=["\']error_list[^>]+>(.+?)', response, + 'errors', default=None) + if errors: + raise ExtractorError('Unable to login: %s' % errors, expected=True) + raise ExtractorError('Unable to log in') + def _real_extract(self, url): mobj = self._match_valid_url(url) - nt = mobj.group('nt') or mobj.group('nt_de') + nt = mobj.group('nt') lecture_id = mobj.group('id') display_id = nt or lecture_id api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json' - video = self._download_json( - (self._DE_API_BASE_URL if self.is_DE else self._API_BASE_URL) + api_path, display_id) + video = self._download_json(self._API_BASE_URL + api_path, display_id) title = video['title'].strip() if not lecture_id: pid = video.get('productId') or video.get('uid') @@ -179,8 +159,20 @@ def _real_extract(self, url): 'automatic_captions': automatic_captions, } +# German Lecturio simply requires different URLs +class LecturioDeIE(LecturioIE): + _VALID_URL = r'https://www\.lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag' -class LecturioCourseIE(LecturioBaseIE): + _TESTS = [{ + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] + + _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' + _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' + + +class LecturioCourseIE(LecturioIE): _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))' _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', @@ -217,12 +209,12 @@ def _real_extract(self, url): clean_html(course.get('description'))) -class LecturioDeCourseIE(LecturioBaseIE): +class LecturioDeCourseIE(LecturioDeIE): _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, - } + }] def _real_extract(self, url): display_id = self._match_id(url) From 8dcf87c769c4ea18b9318c425351f289fe956227 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:12:46 +0000 Subject: [PATCH 3/6] Fixes --- yt_dlp/extractor/lecturio.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 08f45cca6..9ee864731 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -17,9 +17,7 @@ class LecturioBaseIE(InfoExtractor): _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/' _LOGIN_URL = 'https://app.lecturio.com/en/login' - _NETRC_MACHINE = 'lecturio' - _CC_LANGS = { 'Arabic': 'ar', 'Bulgarian': 'bg', @@ -146,7 +144,6 @@ def _real_extract(self, url): class LecturioIE(LecturioBaseIE): _VALID_URL = r'https?://app\.lecturio\.com/([^/?#]+/(?P[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P\d+))' - _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': '9a42cf1d8282a6311bf7211bbde26fde', @@ -161,15 +158,12 @@ class LecturioIE(LecturioBaseIE): 'only_matching': True, }] - -class LecturioDeIE(LecturioIE): - _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P[^/?#&]+)\.vortrag' - +class LecturioDeIE(LecturioBaseIE): + _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P)(?P[^/?#&]+)\.vortrag' _TESTS = [{ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 'only_matching': True, }] - _API_BASE_URL = 'https://lecturio.de/api/de/latest/html5/' _LOGIN_URL = 'https://www.lecturio.de/anmelden.html' @@ -220,7 +214,6 @@ class LecturioDeCourseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) entries = [] @@ -230,7 +223,7 @@ def _real_extract(self, url): lecture_url = urljoin(url, mobj.group('url')) lecture_id = mobj.group('id') entries.append(self.url_result( - lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + lecture_url, LecturioDeIE, video_id=lecture_id)) title = self._search_regex( r']*>([^<]+)', webpage, 'title', default=None) From 9d8933d4d7813e9d4947c1e22f71203746936018 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:13:13 +0000 Subject: [PATCH 4/6] cleanup --- yt_dlp/extractor/lecturio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 9ee864731..492764199 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -33,7 +33,6 @@ class LecturioBaseIE(InfoExtractor): } def _perform_login(self, username, password): - # Sets some cookies _, urlh = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login popup') From 111dd1b4a896d4f8e811dccce2ee0715adc30654 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:14:17 +0000 Subject: [PATCH 5/6] tests formatting --- yt_dlp/extractor/lecturio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 492764199..5e59493c6 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -206,7 +206,7 @@ def _real_extract(self, url): class LecturioDeCourseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/?#]+/(?P[^/?#&]+)\.kurs' - _TEST = { + _TESTS = [{ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 'only_matching': True, }] From 0b6d13c28978b38bca44a04fa3df015fbac969d9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 31 May 2024 04:15:43 +0000 Subject: [PATCH 6/6] formatting --- yt_dlp/extractor/lecturio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 5e59493c6..e88a21d51 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -157,6 +157,7 @@ class LecturioIE(LecturioBaseIE): 'only_matching': True, }] + class LecturioDeIE(LecturioBaseIE): _VALID_URL = r'https?://www\.lecturio\.de/[^/?#]+/(?P)(?P[^/?#&]+)\.vortrag' _TESTS = [{ @@ -213,6 +214,7 @@ class LecturioDeCourseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) entries = []