mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[skip travis] adding automerge support
basically copying content of youtube_dl folder to youtube_dlc and excluding the youtube_dl folder when compiling
This commit is contained in:
		
							
								
								
									
										173
									
								
								youtube_dl/extractor/linuxacademy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								youtube_dl/extractor/linuxacademy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,173 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     orderedSet, | ||||
|     unescapeHTML, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LinuxAcademyIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)?linuxacademy\.com/cp/ | ||||
|                         (?: | ||||
|                             courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| | ||||
|                             modules/view/id/(?P<course_id>\d+) | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', | ||||
|         'info_dict': { | ||||
|             'id': '1498-2', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Introduction to the Practitioner's Brief", | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires Linux Academy account credentials', | ||||
|     }, { | ||||
|         'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://linuxacademy.com/cp/modules/view/id/154', | ||||
|         'info_dict': { | ||||
|             'id': '154', | ||||
|             'title': 'AWS Certified Cloud Practitioner', | ||||
|             'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', | ||||
|         }, | ||||
|         'playlist_count': 41, | ||||
|         'skip': 'Requires Linux Academy account credentials', | ||||
|     }] | ||||
|  | ||||
|     _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' | ||||
|     _ORIGIN_URL = 'https://linuxacademy.com' | ||||
|     _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' | ||||
|     _NETRC_MACHINE = 'linuxacademy' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         def random_string(): | ||||
|             return ''.join([ | ||||
|                 random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') | ||||
|                 for _ in range(32)]) | ||||
|  | ||||
|         webpage, urlh = self._download_webpage_handle( | ||||
|             self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ | ||||
|                 'client_id': self._CLIENT_ID, | ||||
|                 'response_type': 'token id_token', | ||||
|                 'redirect_uri': self._ORIGIN_URL, | ||||
|                 'scope': 'openid email user_impersonation profile', | ||||
|                 'audience': self._ORIGIN_URL, | ||||
|                 'state': random_string(), | ||||
|                 'nonce': random_string(), | ||||
|             }) | ||||
|  | ||||
|         login_data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | ||||
|                 'login info', group='value'), None, | ||||
|             transform_source=lambda x: compat_b64decode(x).decode('utf-8') | ||||
|         )['extraParams'] | ||||
|  | ||||
|         login_data.update({ | ||||
|             'client_id': self._CLIENT_ID, | ||||
|             'redirect_uri': self._ORIGIN_URL, | ||||
|             'tenant': 'lacausers', | ||||
|             'connection': 'Username-Password-Authentication', | ||||
|             'username': username, | ||||
|             'password': password, | ||||
|             'sso': 'true', | ||||
|         }) | ||||
|  | ||||
|         login_state_url = urlh.geturl() | ||||
|  | ||||
|         try: | ||||
|             login_page = self._download_webpage( | ||||
|                 'https://login.linuxacademy.com/usernamepassword/login', None, | ||||
|                 'Downloading login page', data=json.dumps(login_data).encode(), | ||||
|                 headers={ | ||||
|                     'Content-Type': 'application/json', | ||||
|                     'Origin': 'https://login.linuxacademy.com', | ||||
|                     'Referer': login_state_url, | ||||
|                 }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||
|                 error = self._parse_json(e.cause.read(), None) | ||||
|                 message = error.get('description') or error['code'] | ||||
|                 raise ExtractorError( | ||||
|                     '%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|             raise | ||||
|  | ||||
|         callback_page, urlh = self._download_webpage_handle( | ||||
|             'https://login.linuxacademy.com/login/callback', None, | ||||
|             'Downloading callback page', | ||||
|             data=urlencode_postdata(self._hidden_inputs(login_page)), | ||||
|             headers={ | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded', | ||||
|                 'Origin': 'https://login.linuxacademy.com', | ||||
|                 'Referer': login_state_url, | ||||
|             }) | ||||
|  | ||||
|         access_token = self._search_regex( | ||||
|             r'access_token=([^=&]+)', urlh.geturl(), | ||||
|             'access token') | ||||
|  | ||||
|         self._download_webpage( | ||||
|             'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' | ||||
|             % access_token, None, 'Downloading token validation page') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') | ||||
|         item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) | ||||
|  | ||||
|         webpage = self._download_webpage(url, item_id) | ||||
|  | ||||
|         # course path | ||||
|         if course_id: | ||||
|             entries = [ | ||||
|                 self.url_result( | ||||
|                     urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) | ||||
|                 for lesson_url in orderedSet(re.findall( | ||||
|                     r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', | ||||
|                     webpage))] | ||||
|             title = unescapeHTML(self._html_search_regex( | ||||
|                 (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)', | ||||
|                  r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), | ||||
|                 webpage, 'title', default=None, group='value')) | ||||
|             description = unescapeHTML(self._html_search_regex( | ||||
|                 r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | ||||
|                 webpage, 'description', default=None, group='value')) | ||||
|             return self.playlist_result(entries, course_id, title, description) | ||||
|  | ||||
|         # single video path | ||||
|         info = self._extract_jwplayer_data( | ||||
|             webpage, item_id, require_title=False, m3u8_id='hls',) | ||||
|         title = self._search_regex( | ||||
|             (r'>Lecture\s*:\s*(?P<value>[^<]+)', | ||||
|              r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, | ||||
|             'title', group='value') | ||||
|         info.update({ | ||||
|             'id': item_id, | ||||
|             'title': title, | ||||
|         }) | ||||
|         return info | ||||
		Reference in New Issue
	
	Block a user
	 Unknown
					Unknown