mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[kinja] add support for Kinja embeds
closes #5756 closes #11282 closes #22237 closes #22384
This commit is contained in:
		| @@ -513,6 +513,7 @@ from .keezmovies import KeezMoviesIE | ||||
| from .ketnet import KetnetIE | ||||
| from .khanacademy import KhanAcademyIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .kinja import KinjaEmbedIE | ||||
| from .kinopoisk import KinoPoiskIE | ||||
| from .konserthusetplay import KonserthusetPlayIE | ||||
| from .kontrtube import KontrTubeIE | ||||
|   | ||||
| @@ -119,6 +119,7 @@ from .viqeo import ViqeoIE | ||||
| from .expressen import ExpressenIE | ||||
| from .zype import ZypeIE | ||||
| from .odnoklassniki import OdnoklassnikiIE | ||||
| from .kinja import KinjaEmbedIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -1487,16 +1488,18 @@ class GenericIE(InfoExtractor): | ||||
|                 'timestamp': 1432570283, | ||||
|             }, | ||||
|         }, | ||||
|         # OnionStudios embed | ||||
|         # Kinja embed | ||||
|         { | ||||
|             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', | ||||
|             'info_dict': { | ||||
|                 'id': '2855', | ||||
|                 'id': '106351', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', | ||||
|                 'description': 'Migrated from OnionStudios', | ||||
|                 'thumbnail': r're:^https?://.*\.jpe?g$', | ||||
|                 'uploader': 'ClickHole', | ||||
|                 'uploader_id': 'clickhole', | ||||
|                 'uploader': 'clickhole', | ||||
|                 'upload_date': '20150527', | ||||
|                 'timestamp': 1432744860, | ||||
|             } | ||||
|         }, | ||||
|         # SnagFilms embed | ||||
| @@ -2894,6 +2897,12 @@ class GenericIE(InfoExtractor): | ||||
|         if senate_isvp_url: | ||||
|             return self.url_result(senate_isvp_url, 'SenateISVP') | ||||
|  | ||||
|         # Look for Kinja embeds | ||||
|         kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url) | ||||
|         if kinja_embed_urls: | ||||
|             return self.playlist_from_matches( | ||||
|                 kinja_embed_urls, video_id, video_title) | ||||
|  | ||||
|         # Look for OnionStudios embeds | ||||
|         onionstudios_url = OnionStudiosIE._extract_url(webpage) | ||||
|         if onionstudios_url: | ||||
|   | ||||
							
								
								
									
										221
									
								
								youtube_dl/extractor/kinja.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								youtube_dl/extractor/kinja.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,221 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KinjaEmbedIE(InfoExtractor): | ||||
|     IENAME = 'kinja:embed' | ||||
|     _DOMAIN_REGEX = r'''(?:[^.]+\.)? | ||||
|         (?: | ||||
|             avclub| | ||||
|             clickhole| | ||||
|             deadspin| | ||||
|             gizmodo| | ||||
|             jalopnik| | ||||
|             jezebel| | ||||
|             kinja| | ||||
|             kotaku| | ||||
|             lifehacker| | ||||
|             splinternews| | ||||
|             the(?:inventory|onion|root|takeout) | ||||
|         )\.com''' | ||||
|     _COMMON_REGEX = r'''/ | ||||
|         (?: | ||||
|             ajax/inset| | ||||
|             embed/video | ||||
|         )/iframe\?.*?\bid=''' | ||||
|     _VALID_URL = r'''(?x)https?://%s%s | ||||
|         (?P<type> | ||||
|             fb| | ||||
|             imgur| | ||||
|             instagram| | ||||
|             jwp(?:layer)?-video| | ||||
|             kinjavideo| | ||||
|             mcp| | ||||
|             megaphone| | ||||
|             ooyala| | ||||
|             soundcloud(?:-playlist)?| | ||||
|             tumblr-post| | ||||
|             twitch-stream| | ||||
|             twitter| | ||||
|             ustream-channel| | ||||
|             vimeo| | ||||
|             vine| | ||||
|             youtube-(?:list|video) | ||||
|         )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') | ||||
|     _PROVIDER_MAP = { | ||||
|         'fb': ('facebook.com/video.php?v=', 'Facebook'), | ||||
|         'imgur': ('imgur.com/', 'Imgur'), | ||||
|         'instagram': ('instagram.com/p/', 'Instagram'), | ||||
|         'jwplayer-video': _JWPLATFORM_PROVIDER, | ||||
|         'jwp-video': _JWPLATFORM_PROVIDER, | ||||
|         'megaphone': ('player.megaphone.fm/', 'Generic'), | ||||
|         'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'), | ||||
|         'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'), | ||||
|         'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'), | ||||
|         'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'), | ||||
|         'twitch-stream': ('twitch.tv/', 'TwitchStream'), | ||||
|         'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'), | ||||
|         'ustream-channel': ('ustream.tv/embed/', 'Ustream'), | ||||
|         'vimeo': ('vimeo.com/', 'Vimeo'), | ||||
|         'vine': ('vine.co/v/', 'Vine'), | ||||
|         'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'), | ||||
|         'youtube-video': ('youtube.com/embed/', 'Youtube'), | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage, url): | ||||
|         return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer( | ||||
|             r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX), | ||||
|             webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         provider = self._PROVIDER_MAP.get(video_type) | ||||
|         if provider: | ||||
|             video_id = compat_urllib_parse_unquote(video_id) | ||||
|             if video_type == 'tumblr-post': | ||||
|                 video_id, blog = video_id.split('-', 1) | ||||
|                 result_url = provider[0] % (blog, video_id) | ||||
|             elif video_type == 'youtube-list': | ||||
|                 video_id, playlist_id = video_id.split('/') | ||||
|                 result_url = provider[0] % (video_id, playlist_id) | ||||
|             else: | ||||
|                 if video_type == 'ooyala': | ||||
|                     video_id = video_id.split('/')[0] | ||||
|                 result_url = provider[0] + video_id | ||||
|             return self.url_result('http://' + result_url, provider[1]) | ||||
|  | ||||
|         if video_type == 'kinjavideo': | ||||
|             data = self._download_json( | ||||
|                 'https://kinja.com/api/core/video/views/videoById', | ||||
|                 video_id, query={'videoId': video_id})['data'] | ||||
|             title = data['title'] | ||||
|  | ||||
|             formats = [] | ||||
|             for k in ('signedPlaylist', 'streaming'): | ||||
|                 m3u8_url = data.get(k + 'Url') | ||||
|                 if m3u8_url: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             thumbnail = None | ||||
|             poster = data.get('poster') or {} | ||||
|             poster_id = poster.get('id') | ||||
|             if poster_id: | ||||
|                 thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') | ||||
|  | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': strip_or_none(data.get('description')), | ||||
|                 'formats': formats, | ||||
|                 'tags': data.get('tags'), | ||||
|                 'timestamp': int_or_none(try_get( | ||||
|                     data, lambda x: x['postInfo']['publishTimeMillis']), 1000), | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'uploader': data.get('network'), | ||||
|             } | ||||
|         else: | ||||
|             video_data = self._download_json( | ||||
|                 'https://api.vmh.univision.com/metadata/v1/content/' + video_id, | ||||
|                 video_id)['videoMetadata'] | ||||
|             iptc = video_data['photoVideoMetadataIPTC'] | ||||
|             title = iptc['title']['en'] | ||||
|             fmg = video_data.get('photoVideoMetadata_fmg') or {} | ||||
|             tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' | ||||
|             data = self._download_json( | ||||
|                 tvss_domain + '/api/v3/video-auth/url-signature-tokens', | ||||
|                 video_id, query={'mcpids': video_id})['data'][0] | ||||
|             formats = [] | ||||
|  | ||||
|             rendition_url = data.get('renditionUrl') | ||||
|             if rendition_url: | ||||
|                 formats = self._extract_m3u8_formats( | ||||
|                     rendition_url, video_id, 'mp4', | ||||
|                     'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|  | ||||
|             fallback_rendition_url = data.get('fallbackRenditionUrl') | ||||
|             if fallback_rendition_url: | ||||
|                 formats.append({ | ||||
|                     'format_id': 'fallback', | ||||
|                     'tbr': int_or_none(self._search_regex( | ||||
|                         r'_(\d+)\.mp4', fallback_rendition_url, | ||||
|                         'bitrate', default=None)), | ||||
|                     'url': fallback_rendition_url, | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), | ||||
|                 'uploader': fmg.get('network'), | ||||
|                 'duration': int_or_none(iptc.get('fileDuration')), | ||||
|                 'formats': formats, | ||||
|                 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), | ||||
|                 'timestamp': parse_iso8601(iptc.get('dateReleased')), | ||||
|             } | ||||
| @@ -4,13 +4,8 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
| from ..compat import compat_str | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class OnionStudiosIE(InfoExtractor): | ||||
| @@ -20,7 +15,7 @@ class OnionStudiosIE(InfoExtractor): | ||||
|         'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', | ||||
|         'md5': '5a118d466d62b5cd03647cf2c593977f', | ||||
|         'info_dict': { | ||||
|             'id': '2937', | ||||
|             'id': '3459881', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hannibal charges forward, stops for a cocktail', | ||||
|             'description': 'md5:545299bda6abf87e5ec666548c6a9448', | ||||
| @@ -53,43 +48,6 @@ class OnionStudiosIE(InfoExtractor): | ||||
|         mcp_id = compat_str(self._parse_json(self._search_regex( | ||||
|             r'window\.mcpMapping\s*=\s*({.+?});', webpage, | ||||
|             'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) | ||||
|         video_data = self._download_json( | ||||
|             'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, | ||||
|             mcp_id)['videoMetadata'] | ||||
|         iptc = video_data['photoVideoMetadataIPTC'] | ||||
|         title = iptc['title']['en'] | ||||
|         fmg = video_data.get('photoVideoMetadata_fmg') or {} | ||||
|         tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' | ||||
|         data = self._download_json( | ||||
|             tvss_domain + '/api/v3/video-auth/url-signature-tokens', | ||||
|             mcp_id, query={'mcpids': mcp_id})['data'][0] | ||||
|         formats = [] | ||||
|  | ||||
|         rendition_url = data.get('renditionUrl') | ||||
|         if rendition_url: | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 rendition_url, mcp_id, 'mp4', | ||||
|                 'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|  | ||||
|         fallback_rendition_url = data.get('fallbackRenditionUrl') | ||||
|         if fallback_rendition_url: | ||||
|             formats.append({ | ||||
|                 'format_id': 'fallback', | ||||
|                 'tbr': int_or_none(self._search_regex( | ||||
|                     r'_(\d+)\.mp4', fallback_rendition_url, | ||||
|                     'bitrate', default=None)), | ||||
|                 'url': fallback_rendition_url, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), | ||||
|             'uploader': fmg.get('network'), | ||||
|             'duration': int_or_none(iptc.get('fileDuration')), | ||||
|             'formats': formats, | ||||
|             'description': try_get(iptc, lambda x: x['description']['en'], compat_str), | ||||
|             'timestamp': parse_iso8601(iptc.get('dateReleased')), | ||||
|         } | ||||
|         return self.url_result( | ||||
|             'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id, | ||||
|             'KinjaEmbed', mcp_id) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine