mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 04:00:57 +00:00 
			
		
		
		
	[disney] improve extraction
- add support for more urls - detect expired videos - skip Adobe Flash Access protected videos closes #4975 closes #11000 closes #11882 closes #11936
This commit is contained in:
		
							parent
							
								
									0dac7cbb09
								
							
						
					
					
						commit
						9dad941853
					
				| @ -9,13 +9,15 @@ | |||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     compat_str, |     compat_str, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|  |     ExtractorError, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DisneyIE(InfoExtractor): | class DisneyIE(InfoExtractor): | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|         https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})''' |         https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|  |         # Disney.EmbedVideo | ||||||
|         'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', |         'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '545ed1857afee5a0ec239977', |             'id': '545ed1857afee5a0ec239977', | ||||||
| @ -28,6 +30,20 @@ class DisneyIE(InfoExtractor): | |||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         } |         } | ||||||
|  |     }, { | ||||||
|  |         # Grill.burger | ||||||
|  |         'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '5454e9f4e9804a552e3524c8', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '"Intro" Featurette: Rogue One: A Star Wars Story', | ||||||
|  |             'upload_date': '20170104', | ||||||
|  |             'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             # m3u8 download | ||||||
|  |             'skip_download': True, | ||||||
|  |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', |         'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @ -43,31 +59,55 @@ class DisneyIE(InfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', |         'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         domain, video_id = re.match(self._VALID_URL, url).groups() |         domain, video_id, display_id = re.match(self._VALID_URL, url).groups() | ||||||
|         webpage = self._download_webpage( |         if not video_id: | ||||||
|             'http://%s/embed/%s' % (domain, video_id), video_id) |             webpage = self._download_webpage(url, display_id) | ||||||
|         video_data = self._parse_json(self._search_regex( |             grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( | ||||||
|             r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] |                 r'Grill\.burger\s*=\s*({.+})\s*:', | ||||||
|  |                 webpage, 'grill data')) | ||||||
|  |             page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') | ||||||
|  |             video_data = page_data['data'][0] | ||||||
|  |         else: | ||||||
|  |             webpage = self._download_webpage( | ||||||
|  |                 'http://%s/embed/%s' % (domain, video_id), video_id) | ||||||
|  |             page_data = self._parse_json(self._search_regex( | ||||||
|  |                 r'Disney\.EmbedVideo\s*=\s*({.+});', | ||||||
|  |                 webpage, 'embed data'), video_id) | ||||||
|  |             video_data = page_data['video'] | ||||||
| 
 | 
 | ||||||
|         for external in video_data.get('externals', []): |         for external in video_data.get('externals', []): | ||||||
|             if external.get('source') == 'vevo': |             if external.get('source') == 'vevo': | ||||||
|                 return self.url_result('vevo:' + external['data_id'], 'Vevo') |                 return self.url_result('vevo:' + external['data_id'], 'Vevo') | ||||||
| 
 | 
 | ||||||
|  |         video_id = video_data['id'] | ||||||
|         title = video_data['title'] |         title = video_data['title'] | ||||||
| 
 | 
 | ||||||
|         formats = [] |         formats = [] | ||||||
|         for flavor in video_data.get('flavors', []): |         for flavor in video_data.get('flavors', []): | ||||||
|             flavor_format = flavor.get('format') |             flavor_format = flavor.get('format') | ||||||
|             flavor_url = flavor.get('url') |             flavor_url = flavor.get('url') | ||||||
|             if not flavor_url or not re.match(r'https?://', flavor_url): |             if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access': | ||||||
|                 continue |                 continue | ||||||
|             tbr = int_or_none(flavor.get('bitrate')) |             tbr = int_or_none(flavor.get('bitrate')) | ||||||
|             if tbr == 99999: |             if tbr == 99999: | ||||||
|                 formats.extend(self._extract_m3u8_formats( |                 formats.extend(self._extract_m3u8_formats( | ||||||
|                     flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) |                     flavor_url, video_id, 'mp4', | ||||||
|  |                     m3u8_id=flavor_format, fatal=False)) | ||||||
|                 continue |                 continue | ||||||
|             format_id = [] |             format_id = [] | ||||||
|             if flavor_format: |             if flavor_format: | ||||||
| @ -88,6 +128,10 @@ def _real_extract(self, url): | |||||||
|                 'ext': ext, |                 'ext': ext, | ||||||
|                 'vcodec': 'none' if (width == 0 and height == 0) else None, |                 'vcodec': 'none' if (width == 0 and height == 0) else None, | ||||||
|             }) |             }) | ||||||
|  |         if not formats and video_data.get('expired'): | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), | ||||||
|  |                 expected=True) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
| 
 | 
 | ||||||
|         subtitles = {} |         subtitles = {} | ||||||
|  | |||||||
| @ -991,19 +991,6 @@ class GenericIE(InfoExtractor): | |||||||
|                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', |                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
|         # Kaltura embed protected with referrer |  | ||||||
|         { |  | ||||||
|             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '1_g4fbemnq', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': 'Violetta - Achter De Schermen - Ruggero', |  | ||||||
|                 'description': 'Achter de schermen met Ruggero', |  | ||||||
|                 'timestamp': 1435133761, |  | ||||||
|                 'upload_date': '20150624', |  | ||||||
|                 'uploader_id': 'echojecka', |  | ||||||
|             }, |  | ||||||
|         }, |  | ||||||
|         # Kaltura embed with single quotes |         # Kaltura embed with single quotes | ||||||
|         { |         { | ||||||
|             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', |             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine