mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[internetvideoarchive] Fix extraction and support json URLs
This commit is contained in:
		| @@ -1,93 +1,91 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse_urlencode, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     xpath_with_ns, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class InternetVideoArchiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?' | ||||
|     _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', | ||||
|         'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false', | ||||
|         'info_dict': { | ||||
|             'id': '452693', | ||||
|             'id': '194487', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'SKYFALL', | ||||
|             'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', | ||||
|             'duration': 152, | ||||
|             'title': 'KICK-ASS 2', | ||||
|             'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _build_url(query): | ||||
|         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query | ||||
|     def _build_json_url(query): | ||||
|         return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query | ||||
|  | ||||
|     @staticmethod | ||||
|     def _clean_query(query): | ||||
|         NEEDED_ARGS = ['publishedid', 'customerid'] | ||||
|         query_dic = compat_urlparse.parse_qs(query) | ||||
|         cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS) | ||||
|         # Other player ids return m3u8 urls | ||||
|         cleaned_dic['playerid'] = '247' | ||||
|         cleaned_dic['videokbrate'] = '100000' | ||||
|         return compat_urllib_parse_urlencode(cleaned_dic) | ||||
|     def _build_xml_url(query): | ||||
|         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         query = compat_urlparse.urlparse(url).query | ||||
|         query_dic = compat_urlparse.parse_qs(query) | ||||
|         query_dic = compat_parse_qs(query) | ||||
|         video_id = query_dic['publishedid'][0] | ||||
|         url = self._build_url(query) | ||||
|  | ||||
|         flashconfiguration = self._download_xml(url, video_id, | ||||
|                                                 'Downloading flash configuration') | ||||
|         file_url = flashconfiguration.find('file').text | ||||
|         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') | ||||
|         # Replace some of the parameters in the query to get the best quality | ||||
|         # and http links (no m3u8 manifests) | ||||
|         file_url = re.sub(r'(?<=\?)(.+)$', | ||||
|                           lambda m: self._clean_query(m.group()), | ||||
|                           file_url) | ||||
|         info = self._download_xml(file_url, video_id, | ||||
|                                   'Downloading video info') | ||||
|         item = info.find('channel/item') | ||||
|         if '/player/' in url: | ||||
|             configuration = self._download_json(url, video_id) | ||||
|  | ||||
|         def _bp(p): | ||||
|             return xpath_with_ns( | ||||
|                 p, | ||||
|                 { | ||||
|                     'media': 'http://search.yahoo.com/mrss/', | ||||
|                     'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats', | ||||
|                 } | ||||
|             ) | ||||
|         formats = [] | ||||
|         for content in item.findall(_bp('media:group/media:content')): | ||||
|             attr = content.attrib | ||||
|             f_url = attr['url'] | ||||
|             width = int(attr['width']) | ||||
|             bitrate = int(attr['bitrate']) | ||||
|             format_id = '%d-%dk' % (width, bitrate) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': f_url, | ||||
|                 'width': width, | ||||
|                 'tbr': bitrate, | ||||
|             }) | ||||
|             # There are multiple videos in the playlist whlie only the first one | ||||
|             # matches the video played in browsers | ||||
|             video_info = configuration['playlist'][0] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|             formats = [] | ||||
|             for source in video_info['sources']: | ||||
|                 file_url = source['file'] | ||||
|                 if determine_ext(file_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         file_url, video_id, ext='mp4', m3u8_id='hls')) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': file_url, | ||||
|                     } | ||||
|  | ||||
|                     if source.get('label') and source['label'][-4:] == ' kbs': | ||||
|                         tbr = int_or_none(source['label'][:-4]) | ||||
|                         a_format.update({ | ||||
|                             'tbr': tbr, | ||||
|                             'format_id': 'http-%d' % tbr, | ||||
|                         }) | ||||
|                         formats.append(a_format) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             title = video_info['title'] | ||||
|             description = video_info.get('description') | ||||
|             thumbnail = video_info.get('image') | ||||
|         else: | ||||
|             configuration = self._download_xml(url, video_id) | ||||
|             formats = [{ | ||||
|                 'url': xpath_text(configuration, './file', 'file URL', fatal=True), | ||||
|             }] | ||||
|             thumbnail = xpath_text(configuration, './image', 'thumbnail') | ||||
|             title = 'InternetVideoArchive video %s' % video_id | ||||
|             description = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': item.find('title').text, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'], | ||||
|             'description': item.find('description').text, | ||||
|             'duration': int(attr['duration']), | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan