mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[teachable] Add support for teachable based platform sites (closes #5451, closes #18150, closes #18272)
This commit is contained in:
		| @@ -1091,6 +1091,10 @@ from .tass import TassIE | |||||||
| from .tastytrade import TastyTradeIE | from .tastytrade import TastyTradeIE | ||||||
| from .tbs import TBSIE | from .tbs import TBSIE | ||||||
| from .tdslifeway import TDSLifewayIE | from .tdslifeway import TDSLifewayIE | ||||||
|  | from .teachable import ( | ||||||
|  |     TeachableIE, | ||||||
|  |     TeachableCourseIE, | ||||||
|  | ) | ||||||
| from .teachertube import ( | from .teachertube import ( | ||||||
|     TeacherTubeIE, |     TeacherTubeIE, | ||||||
|     TeacherTubeUserIE, |     TeacherTubeUserIE, | ||||||
| @@ -1240,10 +1244,6 @@ from .uplynk import ( | |||||||
|     UplynkIE, |     UplynkIE, | ||||||
|     UplynkPreplayIE, |     UplynkPreplayIE, | ||||||
| ) | ) | ||||||
| from .upskill import ( |  | ||||||
|     UpskillIE, |  | ||||||
|     UpskillCourseIE, |  | ||||||
| ) |  | ||||||
| from .urort import UrortIE | from .urort import UrortIE | ||||||
| from .urplay import URPlayIE | from .urplay import URPlayIE | ||||||
| from .usanetwork import USANetworkIE | from .usanetwork import USANetworkIE | ||||||
|   | |||||||
| @@ -109,6 +109,7 @@ from .vice import ViceIE | |||||||
| from .xfileshare import XFileShareIE | from .xfileshare import XFileShareIE | ||||||
| from .cloudflarestream import CloudflareStreamIE | from .cloudflarestream import CloudflareStreamIE | ||||||
| from .peertube import PeerTubeIE | from .peertube import PeerTubeIE | ||||||
|  | from .teachable import TeachableIE | ||||||
| from .indavideo import IndavideoEmbedIE | from .indavideo import IndavideoEmbedIE | ||||||
| from .apa import APAIE | from .apa import APAIE | ||||||
| from .foxnews import FoxNewsIE | from .foxnews import FoxNewsIE | ||||||
| @@ -3112,6 +3113,10 @@ class GenericIE(InfoExtractor): | |||||||
|             return self.playlist_from_matches( |             return self.playlist_from_matches( | ||||||
|                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key()) |                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key()) | ||||||
|  |  | ||||||
|  |         teachable_url = TeachableIE._extract_url(webpage, url) | ||||||
|  |         if teachable_url: | ||||||
|  |             return self.url_result(teachable_url) | ||||||
|  |  | ||||||
|         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage) |         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage) | ||||||
|         if indavideo_urls: |         if indavideo_urls: | ||||||
|             return self.playlist_from_matches( |             return self.playlist_from_matches( | ||||||
|   | |||||||
| @@ -14,20 +14,38 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class UpskillBaseIE(InfoExtractor): | class TeachableBaseIE(InfoExtractor): | ||||||
|     _LOGIN_URL = 'http://upskillcourses.com/sign_in' |     _NETRC_MACHINE = 'teachable' | ||||||
|     _NETRC_MACHINE = 'upskill' |     _URL_PREFIX = 'teachable:' | ||||||
|  | 
 | ||||||
|  |     _SITES = { | ||||||
|  |         # Only notable ones here | ||||||
|  |         'upskillcourses.com': 'upskill', | ||||||
|  |         'academy.gns3.com': 'gns3', | ||||||
|  |         'academyhacker.com': 'academyhacker', | ||||||
|  |         'stackskills.com': 'stackskills', | ||||||
|  |         'market.saleshacker.com': 'saleshacker', | ||||||
|  |         'learnability.org': 'learnability', | ||||||
|  |         'edurila.com': 'edurila', | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) | ||||||
| 
 | 
 | ||||||
|     def _real_initialize(self): |     def _real_initialize(self): | ||||||
|         self._login() |         self._logged_in = False | ||||||
| 
 | 
 | ||||||
|     def _login(self): |     def _login(self, site): | ||||||
|         username, password = self._get_login_info() |         if self._logged_in: | ||||||
|  |             return | ||||||
|  | 
 | ||||||
|  |         username, password = self._get_login_info( | ||||||
|  |             netrc_machine=self._SITES.get(site, site)) | ||||||
|         if username is None: |         if username is None: | ||||||
|             return |             return | ||||||
| 
 | 
 | ||||||
|         login_page, urlh = self._download_webpage_handle( |         login_page, urlh = self._download_webpage_handle( | ||||||
|             self._LOGIN_URL, None, 'Downloading login page') |             'https://%s/sign_in' % site, None, | ||||||
|  |             'Downloading %s login page' % site) | ||||||
| 
 | 
 | ||||||
|         login_url = compat_str(urlh.geturl()) |         login_url = compat_str(urlh.geturl()) | ||||||
| 
 | 
 | ||||||
| @@ -46,18 +64,24 @@ class UpskillBaseIE(InfoExtractor): | |||||||
|             post_url = urljoin(login_url, post_url) |             post_url = urljoin(login_url, post_url) | ||||||
| 
 | 
 | ||||||
|         response = self._download_webpage( |         response = self._download_webpage( | ||||||
|             post_url, None, 'Logging in', |             post_url, None, 'Logging in to %s' % site, | ||||||
|             data=urlencode_postdata(login_form), |             data=urlencode_postdata(login_form), | ||||||
|             headers={ |             headers={ | ||||||
|                 'Content-Type': 'application/x-www-form-urlencoded', |                 'Content-Type': 'application/x-www-form-urlencoded', | ||||||
|                 'Referer': login_url, |                 'Referer': login_url, | ||||||
|             }) |             }) | ||||||
| 
 | 
 | ||||||
|  |         if '>I accept the new Privacy Policy<' in response: | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 'Unable to login: %s asks you to accept new Privacy Policy. ' | ||||||
|  |                 'Go to https://%s/ and accept.' % (site, site), expected=True) | ||||||
|  | 
 | ||||||
|         # Successful login |         # Successful login | ||||||
|         if any(re.search(p, response) for p in ( |         if any(re.search(p, response) for p in ( | ||||||
|                 r'class=["\']user-signout', |                 r'class=["\']user-signout', | ||||||
|                 r'<a[^>]+\bhref=["\']/sign_out', |                 r'<a[^>]+\bhref=["\']/sign_out', | ||||||
|                 r'>\s*Log out\s*<')): |                 r'>\s*Log out\s*<')): | ||||||
|  |             self._logged_in = True | ||||||
|             return |             return | ||||||
| 
 | 
 | ||||||
|         message = get_element_by_class('alert', response) |         message = get_element_by_class('alert', response) | ||||||
| @@ -68,8 +92,14 @@ class UpskillBaseIE(InfoExtractor): | |||||||
|         raise ExtractorError('Unable to log in') |         raise ExtractorError('Unable to log in') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class UpskillIE(UpskillBaseIE): | class TeachableIE(TeachableBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)' |     _VALID_URL = r'''(?x) | ||||||
|  |                     (?: | ||||||
|  |                         %shttps?://(?P<site_t>[^/]+)| | ||||||
|  |                         https?://(?:www\.)?(?P<site>%s) | ||||||
|  |                     ) | ||||||
|  |                     /courses/[^/]+/lectures/(?P<id>\d+) | ||||||
|  |                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', |         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', | ||||||
| @@ -77,7 +107,7 @@ class UpskillIE(UpskillBaseIE): | |||||||
|             'id': 'uzw6zw58or', |             'id': 'uzw6zw58or', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Welcome to the Course!', |             'title': 'Welcome to the Course!', | ||||||
|             'description': 'md5:8d66c13403783370af62ca97a7357bdd', |             'description': 'md5:65edb0affa582974de4625b9cdea1107', | ||||||
|             'duration': 138.763, |             'duration': 138.763, | ||||||
|             'timestamp': 1479846621, |             'timestamp': 1479846621, | ||||||
|             'upload_date': '20161122', |             'upload_date': '20161122', | ||||||
| @@ -88,10 +118,38 @@ class UpskillIE(UpskillBaseIE): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', |         'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://academy.gns3.com/courses/423415/lectures/6885939', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def _is_teachable(webpage): | ||||||
|  |         return 'teachableTracker.linker:autoLink' in webpage and re.search( | ||||||
|  |             r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com', | ||||||
|  |             webpage) | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def _extract_url(webpage, source_url): | ||||||
|  |         if not TeachableIE._is_teachable(webpage): | ||||||
|  |             print('NOT TEACHABLE') | ||||||
|  |             return | ||||||
|  |         if re.match(r'https?://[^/]+/(?:courses|p)', source_url): | ||||||
|  |             return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url) | ||||||
|  | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         site = mobj.group('site') or mobj.group('site_t') | ||||||
|  |         video_id = mobj.group('id') | ||||||
|  | 
 | ||||||
|  |         self._login(site) | ||||||
|  | 
 | ||||||
|  |         prefixed = url.startswith(self._URL_PREFIX) | ||||||
|  |         if prefixed: | ||||||
|  |             url = url[len(self._URL_PREFIX):] | ||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
| 
 | 
 | ||||||
| @@ -113,12 +171,18 @@ class UpskillIE(UpskillBaseIE): | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class UpskillCourseIE(UpskillBaseIE): | class TeachableCourseIE(TeachableBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)' |     _VALID_URL = r'''(?x) | ||||||
|  |                         (?: | ||||||
|  |                             %shttps?://(?P<site_t>[^/]+)| | ||||||
|  |                             https?://(?:www\.)?(?P<site>%s) | ||||||
|  |                         ) | ||||||
|  |                         /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) | ||||||
|  |                     ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', |         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '119763', |             'id': 'essential-web-developer-course', | ||||||
|             'title': 'The Essential Web Developer Course (Free)', |             'title': 'The Essential Web Developer Course (Free)', | ||||||
|         }, |         }, | ||||||
|         'playlist_count': 192, |         'playlist_count': 192, | ||||||
| @@ -128,21 +192,37 @@ class UpskillCourseIE(UpskillBaseIE): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'http://upskillcourses.com/courses/enrolled/119763', |         'url': 'http://upskillcourses.com/courses/enrolled/119763', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://academy.gns3.com/courses/enrolled/423415', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return False if UpskillIE.suitable(url) else super( |         return False if TeachableIE.suitable(url) else super( | ||||||
|             UpskillCourseIE, cls).suitable(url) |             TeachableCourseIE, cls).suitable(url) | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         course_id = self._match_id(url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         site = mobj.group('site') or mobj.group('site_t') | ||||||
|  |         course_id = mobj.group('id') | ||||||
|  | 
 | ||||||
|  |         self._login(site) | ||||||
|  | 
 | ||||||
|  |         prefixed = url.startswith(self._URL_PREFIX) | ||||||
|  |         if prefixed: | ||||||
|  |             prefix = self._URL_PREFIX | ||||||
|  |             url = url[len(prefix):] | ||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, course_id) |         webpage = self._download_webpage(url, course_id) | ||||||
| 
 | 
 | ||||||
|         course_id = self._search_regex( |         url_base = 'https://%s/' % site | ||||||
|             r'data-course-id=["\'](\d+)', webpage, 'course id', |  | ||||||
|             default=course_id) |  | ||||||
| 
 | 
 | ||||||
|         entries = [] |         entries = [] | ||||||
| 
 | 
 | ||||||
| @@ -162,10 +242,13 @@ class UpskillCourseIE(UpskillBaseIE): | |||||||
|             title = self._html_search_regex( |             title = self._html_search_regex( | ||||||
|                 r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, |                 r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li, | ||||||
|                 'title', default=None) |                 'title', default=None) | ||||||
|  |             entry_url = urljoin(url_base, lecture_url) | ||||||
|  |             if prefixed: | ||||||
|  |                 entry_url = self._URL_PREFIX + entry_url | ||||||
|             entries.append( |             entries.append( | ||||||
|                 self.url_result( |                 self.url_result( | ||||||
|                     urljoin('http://upskillcourses.com/', lecture_url), |                     entry_url, | ||||||
|                     ie=UpskillIE.ie_key(), video_id=lecture_id, |                     ie=TeachableIE.ie_key(), video_id=lecture_id, | ||||||
|                     video_title=clean_html(title))) |                     video_title=clean_html(title))) | ||||||
| 
 | 
 | ||||||
|         course_title = self._html_search_regex( |         course_title = self._html_search_regex( | ||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․