mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[safari:api] Separate extractor (Closes #8871)
This commit is contained in:
		| @@ -628,6 +628,7 @@ from .ruutu import RuutuIE | |||||||
| from .sandia import SandiaIE | from .sandia import SandiaIE | ||||||
| from .safari import ( | from .safari import ( | ||||||
|     SafariIE, |     SafariIE, | ||||||
|  |     SafariApiIE, | ||||||
|     SafariCourseIE, |     SafariCourseIE, | ||||||
| ) | ) | ||||||
| from .sapo import SapoIE | from .sapo import SapoIE | ||||||
|   | |||||||
| @@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor): | |||||||
| class SafariIE(SafariBaseIE): | class SafariIE(SafariBaseIE): | ||||||
|     IE_NAME = 'safari' |     IE_NAME = 'safari' | ||||||
|     IE_DESC = 'safaribooksonline.com online video' |     IE_DESC = 'safaribooksonline.com online video' | ||||||
|     _VALID_URL = r'''(?x)https?:// |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html' | ||||||
|                             (?:www\.)?safaribooksonline\.com/ |  | ||||||
|                                 (?: |  | ||||||
|                                     library/view/[^/]+| |  | ||||||
|                                     api/v1/book |  | ||||||
|                                 )/ |  | ||||||
|                                 (?P<course_id>[^/]+)/ |  | ||||||
|                                     (?:chapter(?:-content)?/)? |  | ||||||
|                                 (?P<part>part\d+)\.html |  | ||||||
|     ''' |  | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', |         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', | ||||||
| @@ -97,9 +88,6 @@ class SafariIE(SafariBaseIE): | |||||||
|             'upload_date': '20150724', |             'upload_date': '20150724', | ||||||
|             'uploader_id': 'stork', |             'uploader_id': 'stork', | ||||||
|         }, |         }, | ||||||
|     }, { |  | ||||||
|         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |     }, { | ||||||
|         # non-digits in course id |         # non-digits in course id | ||||||
|         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', |         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', | ||||||
| @@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
|         course_id = mobj.group('course_id') |         video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) | ||||||
|         part = mobj.group('part') |  | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) |         webpage = self._download_webpage(url, video_id) | ||||||
|         reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') |         reference_id = self._search_regex( | ||||||
|         partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') |             r'data-reference-id=(["\'])(?P<id>.+?)\1', | ||||||
|         ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') |             webpage, 'kaltura reference id', group='id') | ||||||
|  |         partner_id = self._search_regex( | ||||||
|  |             r'data-partner-id=(["\'])(?P<id>.+?)\1', | ||||||
|  |             webpage, 'kaltura widget id', group='id') | ||||||
|  |         ui_id = self._search_regex( | ||||||
|  |             r'data-ui-id=(["\'])(?P<id>.+?)\1', | ||||||
|  |             webpage, 'kaltura uiconf id', group='id') | ||||||
|  |  | ||||||
|         query = { |         query = { | ||||||
|             'wid': '_%s' % partner_id, |             'wid': '_%s' % partner_id, | ||||||
| @@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE): | |||||||
|         if self.LOGGED_IN: |         if self.LOGGED_IN: | ||||||
|             kaltura_session = self._download_json( |             kaltura_session = self._download_json( | ||||||
|                 '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), |                 '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), | ||||||
|                 course_id, 'Downloading kaltura session JSON', |                 video_id, 'Downloading kaltura session JSON', | ||||||
|                 'Unable to download kaltura session JSON', fatal=False) |                 'Unable to download kaltura session JSON', fatal=False) | ||||||
|             if kaltura_session: |             if kaltura_session: | ||||||
|                 session = kaltura_session.get('session') |                 session = kaltura_session.get('session') | ||||||
| @@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE): | |||||||
|             'Kaltura') |             'Kaltura') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SafariApiIE(SafariBaseIE): | ||||||
|  |     IE_NAME = 'safari:api' | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', | ||||||
|  |         'only_matching': True, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         part = self._download_json( | ||||||
|  |             url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), | ||||||
|  |             'Downloading part JSON') | ||||||
|  |         return self.url_result(part['web_url'], SafariIE.ie_key()) | ||||||
|  |  | ||||||
|  |  | ||||||
| class SafariCourseIE(SafariBaseIE): | class SafariCourseIE(SafariBaseIE): | ||||||
|     IE_NAME = 'safari:course' |     IE_NAME = 'safari:course' | ||||||
|     IE_DESC = 'safaribooksonline.com online courses' |     IE_DESC = 'safaribooksonline.com online courses' | ||||||
| @@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE): | |||||||
|                 'No chapters found for course %s' % course_id, expected=True) |                 'No chapters found for course %s' % course_id, expected=True) | ||||||
|  |  | ||||||
|         entries = [ |         entries = [ | ||||||
|             self.url_result(chapter, 'Safari') |             self.url_result(chapter, SafariApiIE.ie_key()) | ||||||
|             for chapter in course_json['chapters']] |             for chapter in course_json['chapters']] | ||||||
|  |  | ||||||
|         course_title = course_json['title'] |         course_title = course_json['title'] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․