mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[udn] Add new extractor
This commit is contained in:
		| @@ -53,6 +53,7 @@ from youtube_dl.utils import ( | ||||
|     uppercase_escape, | ||||
|     url_basename, | ||||
|     urlencode_postdata, | ||||
|     url_infer_protocol, | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
| @@ -296,6 +297,10 @@ class TestUtil(unittest.TestCase): | ||||
|             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), | ||||
|             'trailer.mp4') | ||||
|  | ||||
|     def test_url_infer_protocol(self): | ||||
|         self.assertEqual(url_infer_protocol('http://foo.com/', '//bar.com/'), 'http://bar.com/') | ||||
|         self.assertEqual(url_infer_protocol('http://foo.com/', 'https://bar.com/'), 'https://bar.com/') | ||||
|  | ||||
|     def test_parse_duration(self): | ||||
|         self.assertEqual(parse_duration(None), None) | ||||
|         self.assertEqual(parse_duration(False), None) | ||||
|   | ||||
| @@ -557,6 +557,7 @@ from .udemy import ( | ||||
|     UdemyIE, | ||||
|     UdemyCourseIE | ||||
| ) | ||||
| from .udn import UDNEmbedIE | ||||
| from .ultimedia import UltimediaIE | ||||
| from .unistra import UnistraIE | ||||
| from .urort import UrortIE | ||||
|   | ||||
| @@ -26,6 +26,7 @@ from ..utils import ( | ||||
|     unsmuggle_url, | ||||
|     UnsupportedError, | ||||
|     url_basename, | ||||
|     url_infer_protocol, | ||||
|     xpath_text, | ||||
| ) | ||||
| from .brightcove import BrightcoveIE | ||||
| @@ -34,6 +35,7 @@ from .ooyala import OoyalaIE | ||||
| from .rutv import RUTVIE | ||||
| from .smotri import SmotriIE | ||||
| from .condenast import CondeNastIE | ||||
| from .udn import UDNEmbedIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -650,6 +652,17 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': "PFT Live: New leader in the 'new-look' defense", | ||||
|                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', | ||||
|             }, | ||||
|         }, | ||||
|         # UDN embed | ||||
|         { | ||||
|             'url': 'http://www.udn.com/news/story/7314/822787', | ||||
|             'md5': 'de06b4c90b042c128395a88f0384817e', | ||||
|             'info_dict': { | ||||
|                 'id': '300040', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '生物老師男變女 全校挺"做自己"', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -1268,6 +1281,13 @@ class GenericIE(InfoExtractor): | ||||
|         if nbc_sports_url: | ||||
|             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') | ||||
|  | ||||
|         # Look for UDN embeds | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result( | ||||
|                 url_infer_protocol(url, mobj.group('url')), 'UDNEmbed') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/udn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/udn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     url_infer_protocol, | ||||
|     js_to_json | ||||
| ) | ||||
|  | ||||
|  | ||||
| class UDNEmbedIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.udn.com/embed/news/300040', | ||||
|         'md5': 'de06b4c90b042c128395a88f0384817e', | ||||
|         'info_dict': { | ||||
|             'id': '300040', | ||||
|             'ext': 'mp4', | ||||
|             'title': '生物老師男變女 全校挺"做自己"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': '//video.udn.com/embed/news/300040', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         options = json.loads(js_to_json(self._html_search_regex( | ||||
|             r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) | ||||
|  | ||||
|         video_urls = options['video'] | ||||
|  | ||||
|         if video_urls.get('youtube'): | ||||
|             return self.url_result(video_urls.get('youtube'), 'Youtube') | ||||
|  | ||||
|         try: | ||||
|             del video_urls['youtube'] | ||||
|         except KeyError: | ||||
|             pass | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': self._download_webpage( | ||||
|                 url_infer_protocol(url, api_url), video_id, | ||||
|                 'retrieve url for %s video' % video_type), | ||||
|             'format_id': video_type, | ||||
|             'preference': 0 if video_type == 'mp4' else -1, | ||||
|         } for video_type, api_url in video_urls.items()] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = None | ||||
|  | ||||
|         if options.get('gallery') and len(options['gallery']): | ||||
|             thumbnail = options['gallery'][0].get('original') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': options['title'], | ||||
|             'thumbnail': thumbnail | ||||
|         } | ||||
| @@ -1711,6 +1711,17 @@ def determine_protocol(info_dict): | ||||
|     return compat_urllib_parse_urlparse(url).scheme | ||||
|  | ||||
|  | ||||
| def url_infer_protocol(ref_url, target_url): | ||||
|     """ Infer protocol for protocol independent target urls """ | ||||
|     parsed_target_url = list(compat_urllib_parse_urlparse(target_url)) | ||||
|     if parsed_target_url[0]: | ||||
|         return target_url | ||||
|  | ||||
|     parsed_target_url[0] = compat_urllib_parse_urlparse(ref_url).scheme | ||||
|  | ||||
|     return compat_urlparse.urlunparse(parsed_target_url) | ||||
|  | ||||
|  | ||||
| def render_table(header_row, data): | ||||
|     """ Render a list of rows, each as a list of values """ | ||||
|     table = [header_row] + data | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan