mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality
This commit is contained in:
		| @@ -94,6 +94,7 @@ from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| from .ringtv import RingTVIE | ||||
| from .ro220 import Ro220IE | ||||
| from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .sina import SinaIE | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import xml.etree.ElementTree | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse, | ||||
|     xpath_with_ns, | ||||
|     determine_ext, | ||||
| ) | ||||
| @@ -26,6 +27,16 @@ class InternetVideoArchiveIE(InfoExtractor): | ||||
|     def _build_url(query): | ||||
|         return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query | ||||
|  | ||||
|     @staticmethod | ||||
|     def _clean_query(query): | ||||
|         NEEDED_ARGS = ['publishedid', 'customerid'] | ||||
|         query_dic = compat_urlparse.parse_qs(query) | ||||
|         cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS) | ||||
|         # Other player ids return m3u8 urls | ||||
|         cleaned_dic['playerid'] = '247' | ||||
|         cleaned_dic['videokbrate'] = '100000' | ||||
|         return compat_urllib_parse.urlencode(cleaned_dic) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         query = compat_urlparse.urlparse(url).query | ||||
|         query_dic = compat_urlparse.parse_qs(query) | ||||
| @@ -37,6 +48,11 @@ class InternetVideoArchiveIE(InfoExtractor): | ||||
|         flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) | ||||
|         file_url = flashconfiguration.find('file').text | ||||
|         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') | ||||
|         # Replace some of the parameters in the query to get the best quality | ||||
|         # and http links (no m3u8 manifests) | ||||
|         file_url = re.sub(r'(?<=\?)(.+)$', | ||||
|             lambda m: self._clean_query(m.group()), | ||||
|             file_url) | ||||
|         info_xml = self._download_webpage(file_url, video_id, | ||||
|             u'Downloading video info') | ||||
|         info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | ||||
|   | ||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/rottentomatoes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/rottentomatoes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| from .videodetective import VideoDetectiveIE | ||||
|  | ||||
|  | ||||
| # It just uses the same method as videodetective.com, | ||||
| # the internetvideoarchive.com is extracted from the og:video property | ||||
| class RottenTomatoesIE(VideoDetectiveIE): | ||||
|     _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', | ||||
|         u'file': '613340.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'TOY STORY 3', | ||||
|             u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', | ||||
|         }, | ||||
|     } | ||||
| @@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor): | ||||
|         u'info_dict': { | ||||
|             u'title': u'KICK-ASS 2', | ||||
|             u'description': u'md5:65ba37ad619165afac7d432eaded6013', | ||||
|             u'duration': 135, | ||||
|             u'duration': 138, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz