mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality
This commit is contained in:
		| @@ -94,6 +94,7 @@ from .rbmaradio import RBMARadioIE | |||||||
| from .redtube import RedTubeIE | from .redtube import RedTubeIE | ||||||
| from .ringtv import RingTVIE | from .ringtv import RingTVIE | ||||||
| from .ro220 import Ro220IE | from .ro220 import Ro220IE | ||||||
|  | from .rottentomatoes import RottenTomatoesIE | ||||||
| from .roxwel import RoxwelIE | from .roxwel import RoxwelIE | ||||||
| from .rtlnow import RTLnowIE | from .rtlnow import RTLnowIE | ||||||
| from .sina import SinaIE | from .sina import SinaIE | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ import xml.etree.ElementTree | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|  |     compat_urllib_parse, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|     determine_ext, |     determine_ext, | ||||||
| ) | ) | ||||||
| @@ -26,6 +27,16 @@ class InternetVideoArchiveIE(InfoExtractor): | |||||||
|     def _build_url(query): |     def _build_url(query): | ||||||
|         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query |         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _clean_query(query): | ||||||
|  |         NEEDED_ARGS = ['publishedid', 'customerid'] | ||||||
|  |         query_dic = compat_urlparse.parse_qs(query) | ||||||
|  |         cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS) | ||||||
|  |         # Other player ids return m3u8 urls | ||||||
|  |         cleaned_dic['playerid'] = '247' | ||||||
|  |         cleaned_dic['videokbrate'] = '100000' | ||||||
|  |         return compat_urllib_parse.urlencode(cleaned_dic) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         query = compat_urlparse.urlparse(url).query |         query = compat_urlparse.urlparse(url).query | ||||||
|         query_dic = compat_urlparse.parse_qs(query) |         query_dic = compat_urlparse.parse_qs(query) | ||||||
| @@ -37,6 +48,11 @@ class InternetVideoArchiveIE(InfoExtractor): | |||||||
|         flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) |         flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) | ||||||
|         file_url = flashconfiguration.find('file').text |         file_url = flashconfiguration.find('file').text | ||||||
|         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') |         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') | ||||||
|  |         # Replace some of the parameters in the query to get the best quality | ||||||
|  |         # and http links (no m3u8 manifests) | ||||||
|  |         file_url = re.sub(r'(?<=\?)(.+)$', | ||||||
|  |             lambda m: self._clean_query(m.group()), | ||||||
|  |             file_url) | ||||||
|         info_xml = self._download_webpage(file_url, video_id, |         info_xml = self._download_webpage(file_url, video_id, | ||||||
|             u'Downloading video info') |             u'Downloading video info') | ||||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) |         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/rottentomatoes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/rottentomatoes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | |||||||
|  | from .videodetective import VideoDetectiveIE | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # It just uses the same method as videodetective.com, | ||||||
|  | # the internetvideoarchive.com is extracted from the og:video property | ||||||
|  | class RottenTomatoesIE(VideoDetectiveIE): | ||||||
|  |     _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' | ||||||
|  |  | ||||||
|  |     _TEST = { | ||||||
|  |         u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', | ||||||
|  |         u'file': '613340.mp4', | ||||||
|  |         u'info_dict': { | ||||||
|  |             u'title': u'TOY STORY 3', | ||||||
|  |             u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', | ||||||
|  |         }, | ||||||
|  |     } | ||||||
| @@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor): | |||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u'title': u'KICK-ASS 2', |             u'title': u'KICK-ASS 2', | ||||||
|             u'description': u'md5:65ba37ad619165afac7d432eaded6013', |             u'description': u'md5:65ba37ad619165afac7d432eaded6013', | ||||||
|             u'duration': 135, |             u'duration': 138, | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz