mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[kakao] improve extraction
- support embed URLs - support Kakao Legacy vid based embed URLs - only extract fields used for extraction - strip description and extract tags
This commit is contained in:
		| @@ -6,14 +6,15 @@ from .common import InfoExtractor | |||||||
| from ..compat import compat_str | from ..compat import compat_str | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     strip_or_none, | ||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     update_url_query, |     update_url_query, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class KakaoIE(InfoExtractor): | class KakaoIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)' |     _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)' | ||||||
|     _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' |     _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', |         'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', | ||||||
| @@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor): | |||||||
|             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', |             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', | ||||||
|             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', |             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', | ||||||
|             'uploader_id': 2653210, |             'uploader_id': 2653210, | ||||||
|             'uploader': '쇼 음악중심', |             'uploader': '쇼! 음악중심', | ||||||
|             'timestamp': 1485684628, |             'timestamp': 1485684628, | ||||||
|             'upload_date': '20170129', |             'upload_date': '20170129', | ||||||
|         } |         } | ||||||
| @@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |         display_id = video_id.rstrip('@my') | ||||||
|  |         api_base = self._API_BASE_TMPL % video_id | ||||||
|  |  | ||||||
|         player_header = { |         player_header = { | ||||||
|             'Referer': update_url_query( |             'Referer': update_url_query( | ||||||
| @@ -55,20 +58,22 @@ class KakaoIE(InfoExtractor): | |||||||
|                 }) |                 }) | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         QUERY_COMMON = { |         query = { | ||||||
|             'player': 'monet_html5', |             'player': 'monet_html5', | ||||||
|             'referer': url, |             'referer': url, | ||||||
|             'uuid': '', |             'uuid': '', | ||||||
|             'service': 'kakao_tv', |             'service': 'kakao_tv', | ||||||
|             'section': '', |             'section': '', | ||||||
|             'dteType': 'PC', |             'dteType': 'PC', | ||||||
|  |             'fields': ','.join([ | ||||||
|  |                 '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', | ||||||
|  |                 'description', 'channelId', 'createTime', 'duration', 'playCount', | ||||||
|  |                 'likeCount', 'commentCount', 'tagList', 'channel', 'name', | ||||||
|  |                 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         query = QUERY_COMMON.copy() |  | ||||||
|         query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' |  | ||||||
|         impress = self._download_json( |         impress = self._download_json( | ||||||
|             '%s/%s/impress' % (self._API_BASE, video_id), |             api_base + 'impress', display_id, 'Downloading video info', | ||||||
|             video_id, 'Downloading video info', |  | ||||||
|             query=query, headers=player_header) |             query=query, headers=player_header) | ||||||
|  |  | ||||||
|         clip_link = impress['clipLink'] |         clip_link = impress['clipLink'] | ||||||
| @@ -78,30 +83,27 @@ class KakaoIE(InfoExtractor): | |||||||
|  |  | ||||||
|         tid = impress.get('tid', '') |         tid = impress.get('tid', '') | ||||||
|  |  | ||||||
|         query = QUERY_COMMON.copy() |  | ||||||
|         query.update({ |         query.update({ | ||||||
|  |             'fields': '-*,outputList,profile,width,height,label,filesize', | ||||||
|             'tid': tid, |             'tid': tid, | ||||||
|             'profile': 'HIGH', |             'profile': 'HIGH', | ||||||
|         }) |         }) | ||||||
|         raw = self._download_json( |         raw = self._download_json( | ||||||
|             '%s/%s/raw' % (self._API_BASE, video_id), |             api_base + 'raw', display_id, 'Downloading video formats info', | ||||||
|             video_id, 'Downloading video formats info', |  | ||||||
|             query=query, headers=player_header) |             query=query, headers=player_header) | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for fmt in raw.get('outputList', []): |         for fmt in raw.get('outputList', []): | ||||||
|             try: |             try: | ||||||
|                 profile_name = fmt['profile'] |                 profile_name = fmt['profile'] | ||||||
|  |                 query.update({ | ||||||
|  |                     'profile': profile_name, | ||||||
|  |                     'fields': '-*,url', | ||||||
|  |                 }) | ||||||
|                 fmt_url_json = self._download_json( |                 fmt_url_json = self._download_json( | ||||||
|                     '%s/%s/raw/videolocation' % (self._API_BASE, video_id), |                     api_base + 'raw/videolocation', display_id, | ||||||
|                     video_id, |  | ||||||
|                     'Downloading video URL for profile %s' % profile_name, |                     'Downloading video URL for profile %s' % profile_name, | ||||||
|                     query={ |                     query=query, headers=player_header, fatal=False) | ||||||
|                         'service': 'kakao_tv', |  | ||||||
|                         'section': '', |  | ||||||
|                         'tid': tid, |  | ||||||
|                         'profile': profile_name |  | ||||||
|                     }, headers=player_header, fatal=False) |  | ||||||
|  |  | ||||||
|                 if fmt_url_json is None: |                 if fmt_url_json is None: | ||||||
|                     continue |                     continue | ||||||
| @@ -134,9 +136,9 @@ class KakaoIE(InfoExtractor): | |||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': display_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'description': clip.get('description'), |             'description': strip_or_none(clip.get('description')), | ||||||
|             'uploader': clip_link.get('channel', {}).get('name'), |             'uploader': clip_link.get('channel', {}).get('name'), | ||||||
|             'uploader_id': clip_link.get('channelId'), |             'uploader_id': clip_link.get('channelId'), | ||||||
|             'thumbnails': thumbs, |             'thumbnails': thumbs, | ||||||
| @@ -146,4 +148,5 @@ class KakaoIE(InfoExtractor): | |||||||
|             'like_count': int_or_none(clip.get('likeCount')), |             'like_count': int_or_none(clip.get('likeCount')), | ||||||
|             'comment_count': int_or_none(clip.get('commentCount')), |             'comment_count': int_or_none(clip.get('commentCount')), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|  |             'tags': clip.get('tagList'), | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine