mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[scte] Add extractor (closes #22975)
This commit is contained in:
		| @@ -980,6 +980,10 @@ from .sbs import SBSIE | |||||||
| from .screencast import ScreencastIE | from .screencast import ScreencastIE | ||||||
| from .screencastomatic import ScreencastOMaticIE | from .screencastomatic import ScreencastOMaticIE | ||||||
| from .scrippsnetworks import ScrippsNetworksWatchIE | from .scrippsnetworks import ScrippsNetworksWatchIE | ||||||
|  | from .scte import ( | ||||||
|  |     SCTEIE, | ||||||
|  |     SCTECourseIE, | ||||||
|  | ) | ||||||
| from .seeker import SeekerIE | from .seeker import SeekerIE | ||||||
| from .senateisvp import SenateISVPIE | from .senateisvp import SenateISVPIE | ||||||
| from .sendtonews import SendtoNewsIE | from .sendtonews import SendtoNewsIE | ||||||
|   | |||||||
							
								
								
									
										144
									
								
								youtube_dl/extractor/scte.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								youtube_dl/extractor/scte.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     decode_packed_codes, | ||||||
|  |     ExtractorError, | ||||||
|  |     urlencode_postdata, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SCTEBaseIE(InfoExtractor): | ||||||
|  |     _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' | ||||||
|  |     _NETRC_MACHINE = 'scte' | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         self._login() | ||||||
|  |  | ||||||
|  |     def _login(self): | ||||||
|  |         username, password = self._get_login_info() | ||||||
|  |         if username is None: | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         login_popup = self._download_webpage( | ||||||
|  |             self._LOGIN_URL, None, 'Downloading login popup') | ||||||
|  |  | ||||||
|  |         def is_logged(webpage): | ||||||
|  |             return any(re.search(p, webpage) for p in ( | ||||||
|  |                 r'class=["\']welcome\b', r'>Sign Out<')) | ||||||
|  |  | ||||||
|  |         # already logged in | ||||||
|  |         if is_logged(login_popup): | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         login_form = self._hidden_inputs(login_popup) | ||||||
|  |  | ||||||
|  |         login_form.update({ | ||||||
|  |             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, | ||||||
|  |             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, | ||||||
|  |             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |         response = self._download_webpage( | ||||||
|  |             self._LOGIN_URL, None, 'Logging in', | ||||||
|  |             data=urlencode_postdata(login_form)) | ||||||
|  |  | ||||||
|  |         if '|pageRedirect|' not in response and not is_logged(response): | ||||||
|  |             error = self._html_search_regex( | ||||||
|  |                 r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', | ||||||
|  |                 response, 'error message', default=None) | ||||||
|  |             if error: | ||||||
|  |                 raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||||
|  |             raise ExtractorError('Unable to log in') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SCTEIE(SCTEBaseIE): | ||||||
|  |     _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'Introduction to DOCSIS Engineering Professional', | ||||||
|  |             'id': '31484', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 5, | ||||||
|  |         'skip': 'Requires account credentials', | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') | ||||||
|  |  | ||||||
|  |         context_id = self._search_regex(r'context-(\d+)', webpage, video_id) | ||||||
|  |         content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id | ||||||
|  |         context = decode_packed_codes(self._download_webpage( | ||||||
|  |             '%smobile/data.js' % content_base, video_id)) | ||||||
|  |  | ||||||
|  |         data = self._parse_xml( | ||||||
|  |             self._search_regex( | ||||||
|  |                 r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), | ||||||
|  |             video_id) | ||||||
|  |  | ||||||
|  |         entries = [] | ||||||
|  |         for asset in data.findall('.//asset'): | ||||||
|  |             asset_url = asset.get('url') | ||||||
|  |             if not asset_url or not asset_url.endswith('.mp4'): | ||||||
|  |                 continue | ||||||
|  |             asset_id = self._search_regex( | ||||||
|  |                 r'video_([^_]+)_', asset_url, 'asset id', default=None) | ||||||
|  |             if not asset_id: | ||||||
|  |                 continue | ||||||
|  |             entries.append({ | ||||||
|  |                 'id': asset_id, | ||||||
|  |                 'title': title, | ||||||
|  |                 'url': content_base + asset_url, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries, video_id, title) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SCTECourseIE(SCTEBaseIE): | ||||||
|  |     _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://learning.scte.org/course/view.php?id=3639', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://learning.scte.org/course/view.php?id=3073', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         course_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         webpage = self._download_webpage(url, course_id) | ||||||
|  |  | ||||||
|  |         title = self._search_regex( | ||||||
|  |             r'<h1>(.+?)</h1>', webpage, 'title', default=None) | ||||||
|  |  | ||||||
|  |         entries = [] | ||||||
|  |         for mobj in re.finditer( | ||||||
|  |                 r'''(?x) | ||||||
|  |                     <a[^>]+ | ||||||
|  |                         href=(["\']) | ||||||
|  |                         (?P<url> | ||||||
|  |                             https?://learning\.scte\.org/mod/ | ||||||
|  |                             (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*? | ||||||
|  |                             \bid=\d+ | ||||||
|  |                         ) | ||||||
|  |                     ''', | ||||||
|  |                 webpage): | ||||||
|  |             item_url = mobj.group('url') | ||||||
|  |             if item_url == url: | ||||||
|  |                 continue | ||||||
|  |             ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' | ||||||
|  |                   else SCTECourseIE.ie_key()) | ||||||
|  |             entries.append(self.url_result(item_url, ie=ie)) | ||||||
|  |  | ||||||
|  |         return self.playlist_result(entries, course_id, title) | ||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․