mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[udn] Add new extractor
This commit is contained in:
		| @@ -53,6 +53,7 @@ from youtube_dl.utils import ( | |||||||
|     uppercase_escape, |     uppercase_escape, | ||||||
|     url_basename, |     url_basename, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
|  |     url_infer_protocol, | ||||||
|     version_tuple, |     version_tuple, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|     xpath_text, |     xpath_text, | ||||||
| @@ -296,6 +297,10 @@ class TestUtil(unittest.TestCase): | |||||||
|             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), |             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), | ||||||
|             'trailer.mp4') |             'trailer.mp4') | ||||||
|  |  | ||||||
|  |     def test_url_infer_protocol(self): | ||||||
|  |         self.assertEqual(url_infer_protocol('http://foo.com/', '//bar.com/'), 'http://bar.com/') | ||||||
|  |         self.assertEqual(url_infer_protocol('http://foo.com/', 'https://bar.com/'), 'https://bar.com/') | ||||||
|  |  | ||||||
|     def test_parse_duration(self): |     def test_parse_duration(self): | ||||||
|         self.assertEqual(parse_duration(None), None) |         self.assertEqual(parse_duration(None), None) | ||||||
|         self.assertEqual(parse_duration(False), None) |         self.assertEqual(parse_duration(False), None) | ||||||
|   | |||||||
| @@ -557,6 +557,7 @@ from .udemy import ( | |||||||
|     UdemyIE, |     UdemyIE, | ||||||
|     UdemyCourseIE |     UdemyCourseIE | ||||||
| ) | ) | ||||||
|  | from .udn import UDNEmbedIE | ||||||
| from .ultimedia import UltimediaIE | from .ultimedia import UltimediaIE | ||||||
| from .unistra import UnistraIE | from .unistra import UnistraIE | ||||||
| from .urort import UrortIE | from .urort import UrortIE | ||||||
|   | |||||||
| @@ -26,6 +26,7 @@ from ..utils import ( | |||||||
|     unsmuggle_url, |     unsmuggle_url, | ||||||
|     UnsupportedError, |     UnsupportedError, | ||||||
|     url_basename, |     url_basename, | ||||||
|  |     url_infer_protocol, | ||||||
|     xpath_text, |     xpath_text, | ||||||
| ) | ) | ||||||
| from .brightcove import BrightcoveIE | from .brightcove import BrightcoveIE | ||||||
| @@ -34,6 +35,7 @@ from .ooyala import OoyalaIE | |||||||
| from .rutv import RUTVIE | from .rutv import RUTVIE | ||||||
| from .smotri import SmotriIE | from .smotri import SmotriIE | ||||||
| from .condenast import CondeNastIE | from .condenast import CondeNastIE | ||||||
|  | from .udn import UDNEmbedIE | ||||||
|  |  | ||||||
|  |  | ||||||
| class GenericIE(InfoExtractor): | class GenericIE(InfoExtractor): | ||||||
| @@ -650,6 +652,17 @@ class GenericIE(InfoExtractor): | |||||||
|                 'title': "PFT Live: New leader in the 'new-look' defense", |                 'title': "PFT Live: New leader in the 'new-look' defense", | ||||||
|                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', |                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', | ||||||
|             }, |             }, | ||||||
|  |         }, | ||||||
|  |         # UDN embed | ||||||
|  |         { | ||||||
|  |             'url': 'http://www.udn.com/news/story/7314/822787', | ||||||
|  |             'md5': 'de06b4c90b042c128395a88f0384817e', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '300040', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': '生物老師男變女 全校挺"做自己"', | ||||||
|  |                 'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
| @@ -1268,6 +1281,13 @@ class GenericIE(InfoExtractor): | |||||||
|         if nbc_sports_url: |         if nbc_sports_url: | ||||||
|             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') |             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') | ||||||
|  |  | ||||||
|  |         # Look for UDN embeds | ||||||
|  |         mobj = re.search( | ||||||
|  |             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) | ||||||
|  |         if mobj is not None: | ||||||
|  |             return self.url_result( | ||||||
|  |                 url_infer_protocol(url, mobj.group('url')), 'UDNEmbed') | ||||||
|  |  | ||||||
|         def check_video(vurl): |         def check_video(vurl): | ||||||
|             if YoutubeIE.suitable(vurl): |             if YoutubeIE.suitable(vurl): | ||||||
|                 return True |                 return True | ||||||
|   | |||||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/udn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/udn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | import json | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     url_infer_protocol, | ||||||
|  |     js_to_json | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class UDNEmbedIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'http://video.udn.com/embed/news/300040', | ||||||
|  |         'md5': 'de06b4c90b042c128395a88f0384817e', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '300040', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '生物老師男變女 全校挺"做自己"', | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         'url': '//video.udn.com/embed/news/300040', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         page = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         options = json.loads(js_to_json(self._html_search_regex( | ||||||
|  |             r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) | ||||||
|  |  | ||||||
|  |         video_urls = options['video'] | ||||||
|  |  | ||||||
|  |         if video_urls.get('youtube'): | ||||||
|  |             return self.url_result(video_urls.get('youtube'), 'Youtube') | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             del video_urls['youtube'] | ||||||
|  |         except KeyError: | ||||||
|  |             pass | ||||||
|  |  | ||||||
|  |         formats = [{ | ||||||
|  |             'url': self._download_webpage( | ||||||
|  |                 url_infer_protocol(url, api_url), video_id, | ||||||
|  |                 'retrieve url for %s video' % video_type), | ||||||
|  |             'format_id': video_type, | ||||||
|  |             'preference': 0 if video_type == 'mp4' else -1, | ||||||
|  |         } for video_type, api_url in video_urls.items()] | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         thumbnail = None | ||||||
|  |  | ||||||
|  |         if options.get('gallery') and len(options['gallery']): | ||||||
|  |             thumbnail = options['gallery'][0].get('original') | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'formats': formats, | ||||||
|  |             'title': options['title'], | ||||||
|  |             'thumbnail': thumbnail | ||||||
|  |         } | ||||||
| @@ -1711,6 +1711,17 @@ def determine_protocol(info_dict): | |||||||
|     return compat_urllib_parse_urlparse(url).scheme |     return compat_urllib_parse_urlparse(url).scheme | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def url_infer_protocol(ref_url, target_url): | ||||||
|  |     """ Infer protocol for protocol independent target urls """ | ||||||
|  |     parsed_target_url = list(compat_urllib_parse_urlparse(target_url)) | ||||||
|  |     if parsed_target_url[0]: | ||||||
|  |         return target_url | ||||||
|  |  | ||||||
|  |     parsed_target_url[0] = compat_urllib_parse_urlparse(ref_url).scheme | ||||||
|  |  | ||||||
|  |     return compat_urlparse.urlunparse(parsed_target_url) | ||||||
|  |  | ||||||
|  |  | ||||||
| def render_table(header_row, data): | def render_table(header_row, data): | ||||||
|     """ Render a list of rows, each as a list of values """ |     """ Render a list of rows, each as a list of values """ | ||||||
|     table = [header_row] + data |     table = [header_row] + data | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan