mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[khanacademy] Add support (Fixes #2066)
This commit is contained in:
		| @@ -1,7 +1,6 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| # encoding: utf-8 | # encoding: utf-8 | ||||||
|  |  | ||||||
|  |  | ||||||
| # Allow direct execution | # Allow direct execution | ||||||
| import os | import os | ||||||
| import sys | import sys | ||||||
| @@ -30,6 +29,7 @@ from youtube_dl.extractor import ( | |||||||
|     SmotriUserIE, |     SmotriUserIE, | ||||||
|     IviCompilationIE, |     IviCompilationIE, | ||||||
|     ImdbListIE, |     ImdbListIE, | ||||||
|  |     KhanAcademyIE, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertEqual(result['title'], u'Animated and Family Films') |         self.assertEqual(result['title'], u'Animated and Family Films') | ||||||
|         self.assertTrue(len(result['entries']) >= 48) |         self.assertTrue(len(result['entries']) >= 48) | ||||||
|  |  | ||||||
|  |     def test_khanacademy_topic(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = KhanAcademyIE(dl) | ||||||
|  |         result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], u'cryptography') | ||||||
|  |         self.assertEqual(result['title'], u'Journey into cryptography') | ||||||
|  |         self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?') | ||||||
|  |         self.assertTrue(len(result['entries']) >= 3) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -98,6 +98,7 @@ from .justintv import JustinTVIE | |||||||
| from .jpopsukitv import JpopsukiIE | from .jpopsukitv import JpopsukiIE | ||||||
| from .kankan import KankanIE | from .kankan import KankanIE | ||||||
| from .keezmovies import KeezMoviesIE | from .keezmovies import KeezMoviesIE | ||||||
|  | from .khanacademy import KhanAcademyIE | ||||||
| from .kickstarter import KickStarterIE | from .kickstarter import KickStarterIE | ||||||
| from .keek import KeekIE | from .keek import KeekIE | ||||||
| from .liveleak import LiveLeakIE | from .liveleak import LiveLeakIE | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| import base64 | import base64 | ||||||
|  | import json | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import socket | import socket | ||||||
| @@ -260,6 +261,15 @@ class InfoExtractor(object): | |||||||
|             xml_string = transform_source(xml_string) |             xml_string = transform_source(xml_string) | ||||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) |         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||||
|  |  | ||||||
|  |     def _download_json(self, url_or_request, video_id, | ||||||
|  |                        note=u'Downloading JSON metadata', | ||||||
|  |                        errnote=u'Unable to download JSON metadata'): | ||||||
|  |         json_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||||
|  |         try: | ||||||
|  |             return json.loads(json_string) | ||||||
|  |         except ValueError as ve: | ||||||
|  |             raise ExtractorError('Failed to download JSON', cause=ve) | ||||||
|  |  | ||||||
|     def report_warning(self, msg, video_id=None): |     def report_warning(self, msg, video_id=None): | ||||||
|         idstr = u'' if video_id is None else u'%s: ' % video_id |         idstr = u'' if video_id is None else u'%s: ' % video_id | ||||||
|         self._downloader.report_warning( |         self._downloader.report_warning( | ||||||
|   | |||||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/khanacademy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/khanacademy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     unified_strdate, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class KhanAcademyIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])' | ||||||
|  |     IE_NAME = 'KhanAcademy' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.khanacademy.org/video/one-time-pad', | ||||||
|  |         'file': 'one-time-pad.mp4', | ||||||
|  |         'md5': '7021db7f2d47d4fff89b13177cb1e8f4', | ||||||
|  |         'info_dict': { | ||||||
|  |             'title': 'The one-time pad', | ||||||
|  |             'description': 'The perfect cipher', | ||||||
|  |             'duration': 176, | ||||||
|  |             'uploader': 'Brit Cruise', | ||||||
|  |             'upload_date': '20120411', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         m = re.match(self._VALID_URL, url) | ||||||
|  |         video_id = m.group('id') | ||||||
|  |  | ||||||
|  |         if m.group('key') == 'video': | ||||||
|  |             data = self._download_json( | ||||||
|  |                 'http://api.khanacademy.org/api/v1/videos/' + video_id, | ||||||
|  |                 video_id, 'Downloading video info') | ||||||
|  |  | ||||||
|  |             upload_date = unified_strdate(data['date_added']) | ||||||
|  |             uploader = ', '.join(data['author_names']) | ||||||
|  |             return { | ||||||
|  |                 '_type': 'url_transparent', | ||||||
|  |                 'url': data['url'], | ||||||
|  |                 'id': video_id, | ||||||
|  |                 'title': data['title'], | ||||||
|  |                 'thumbnail': data['image_url'], | ||||||
|  |                 'duration': data['duration'], | ||||||
|  |                 'description': data['description'], | ||||||
|  |                 'uploader': uploader, | ||||||
|  |                 'upload_date': upload_date, | ||||||
|  |             } | ||||||
|  |         else: | ||||||
|  |             # topic | ||||||
|  |             data = self._download_json( | ||||||
|  |                 'http://api.khanacademy.org/api/v1/topic/' + video_id, | ||||||
|  |                 video_id, 'Downloading topic info') | ||||||
|  |  | ||||||
|  |             entries = [ | ||||||
|  |                 { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'url': c['url'], | ||||||
|  |                     'id': c['id'], | ||||||
|  |                     'title': c['title'], | ||||||
|  |                 } | ||||||
|  |                 for c in data['children'] if c['kind'] in ('Video', 'Topic')] | ||||||
|  |  | ||||||
|  |             return { | ||||||
|  |                 '_type': 'playlist', | ||||||
|  |                 'id': video_id, | ||||||
|  |                 'title': data['title'], | ||||||
|  |                 'description': data['description'], | ||||||
|  |                 'entries': entries, | ||||||
|  |             } | ||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister