mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-25 11:40:59 +00:00 
			
		
		
		
	[tudou] Merge into youku extractor (fixes #12214)
Also, there are no tudou playlists anymore. All playlist URLs points to youku playlists.
This commit is contained in:
		
							parent
							
								
									d3d4ba7f24
								
							
						
					
					
						commit
						c130f0a37b
					
				| @ -5,6 +5,7 @@ Core | |||||||
| * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) | * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) | ||||||
| 
 | 
 | ||||||
| Extractors | Extractors | ||||||
|  | * [tudou] Merge into youku extractor (#12214) | ||||||
| * [youku:show] Fix extraction | * [youku:show] Fix extraction | ||||||
| * [youku] Fix extraction (#13191) | * [youku] Fix extraction (#13191) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1019,11 +1019,6 @@ | |||||||
| from .trutv import TruTVIE | from .trutv import TruTVIE | ||||||
| from .tube8 import Tube8IE | from .tube8 import Tube8IE | ||||||
| from .tubitv import TubiTvIE | from .tubitv import TubiTvIE | ||||||
| from .tudou import ( |  | ||||||
|     TudouIE, |  | ||||||
|     TudouPlaylistIE, |  | ||||||
|     TudouAlbumIE, |  | ||||||
| ) |  | ||||||
| from .tumblr import TumblrIE | from .tumblr import TumblrIE | ||||||
| from .tunein import ( | from .tunein import ( | ||||||
|     TuneInClipIE, |     TuneInClipIE, | ||||||
|  | |||||||
| @ -3,138 +3,6 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( |  | ||||||
|     ExtractorError, |  | ||||||
|     int_or_none, |  | ||||||
|     InAdvancePagedList, |  | ||||||
|     float_or_none, |  | ||||||
|     unescapeHTML, |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TudouIE(InfoExtractor): |  | ||||||
|     IE_NAME = 'tudou' |  | ||||||
|     _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})' |  | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', |  | ||||||
|         'md5': '140a49ed444bd22f93330985d8475fcb', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '159448201', |  | ||||||
|             'ext': 'f4v', |  | ||||||
|             'title': '卡马乔国足开大脚长传冲吊集锦', |  | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |  | ||||||
|             'timestamp': 1372113489000, |  | ||||||
|             'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', |  | ||||||
|             'duration': 289.04, |  | ||||||
|             'view_count': int, |  | ||||||
|             'filesize': int, |  | ||||||
|         } |  | ||||||
|     }, { |  | ||||||
|         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '117049447', |  | ||||||
|             'ext': 'f4v', |  | ||||||
|             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', |  | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |  | ||||||
|             'timestamp': 1349207518000, |  | ||||||
|             'description': 'md5:294612423894260f2dcd5c6c04fe248b', |  | ||||||
|             'duration': 5478.33, |  | ||||||
|             'view_count': int, |  | ||||||
|             'filesize': int, |  | ||||||
|         } |  | ||||||
|     }] |  | ||||||
| 
 |  | ||||||
|     _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' |  | ||||||
| 
 |  | ||||||
|     # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf |  | ||||||
|     # 0001, 0002 and 4001 are not included as they indicate temporary issues |  | ||||||
|     TVC_ERRORS = { |  | ||||||
|         '0003': 'The video is deleted or does not exist', |  | ||||||
|         '1001': 'This video is unavailable due to licensing issues', |  | ||||||
|         '1002': 'This video is unavailable as it\'s under review', |  | ||||||
|         '1003': 'This video is unavailable as it\'s under review', |  | ||||||
|         '3001': 'Password required', |  | ||||||
|         '5001': 'This video is available in Mainland China only due to licensing issues', |  | ||||||
|         '7001': 'This video is unavailable', |  | ||||||
|         '8001': 'This video is unavailable due to licensing issues', |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     def _url_for_id(self, video_id, quality=None): |  | ||||||
|         info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) |  | ||||||
|         if quality: |  | ||||||
|             info_url += '&hd' + quality |  | ||||||
|         xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page') |  | ||||||
|         error = xml_data.attrib.get('error') |  | ||||||
|         if error is not None: |  | ||||||
|             raise ExtractorError('Tudou said: %s' % error, expected=True) |  | ||||||
|         final_url = xml_data.text |  | ||||||
|         return final_url |  | ||||||
| 
 |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         video_id = self._match_id(url) |  | ||||||
|         item_data = self._download_json( |  | ||||||
|             'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id) |  | ||||||
| 
 |  | ||||||
|         youku_vcode = item_data.get('vcode') |  | ||||||
|         if youku_vcode: |  | ||||||
|             return self.url_result('youku:' + youku_vcode, ie='Youku') |  | ||||||
| 
 |  | ||||||
|         if not item_data.get('itemSegs'): |  | ||||||
|             tvc_code = item_data.get('tvcCode') |  | ||||||
|             if tvc_code: |  | ||||||
|                 err_msg = self.TVC_ERRORS.get(tvc_code) |  | ||||||
|                 if err_msg: |  | ||||||
|                     raise ExtractorError('Tudou said: %s' % err_msg, expected=True) |  | ||||||
|                 raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code) |  | ||||||
|             raise ExtractorError('Unxpected error returned from Tudou') |  | ||||||
| 
 |  | ||||||
|         title = unescapeHTML(item_data['kw']) |  | ||||||
|         description = item_data.get('desc') |  | ||||||
|         thumbnail_url = item_data.get('pic') |  | ||||||
|         view_count = int_or_none(item_data.get('playTimes')) |  | ||||||
|         timestamp = int_or_none(item_data.get('pt')) |  | ||||||
| 
 |  | ||||||
|         segments = self._parse_json(item_data['itemSegs'], video_id) |  | ||||||
|         # It looks like the keys are the arguments that have to be passed as |  | ||||||
|         # the hd field in the request url, we pick the higher |  | ||||||
|         # Also, filter non-number qualities (see issue #3643). |  | ||||||
|         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), |  | ||||||
|                          key=lambda k: int(k))[-1] |  | ||||||
|         parts = segments[quality] |  | ||||||
|         len_parts = len(parts) |  | ||||||
|         if len_parts > 1: |  | ||||||
|             self.to_screen('%s: found %s parts' % (video_id, len_parts)) |  | ||||||
| 
 |  | ||||||
|         def part_func(partnum): |  | ||||||
|             part = parts[partnum] |  | ||||||
|             part_id = part['k'] |  | ||||||
|             final_url = self._url_for_id(part_id, quality) |  | ||||||
|             ext = (final_url.split('?')[0]).split('.')[-1] |  | ||||||
|             return [{ |  | ||||||
|                 'id': '%s' % part_id, |  | ||||||
|                 'url': final_url, |  | ||||||
|                 'ext': ext, |  | ||||||
|                 'title': title, |  | ||||||
|                 'thumbnail': thumbnail_url, |  | ||||||
|                 'description': description, |  | ||||||
|                 'view_count': view_count, |  | ||||||
|                 'timestamp': timestamp, |  | ||||||
|                 'duration': float_or_none(part.get('seconds'), 1000), |  | ||||||
|                 'filesize': int_or_none(part.get('size')), |  | ||||||
|                 'http_headers': { |  | ||||||
|                     'Referer': self._PLAYER_URL, |  | ||||||
|                 }, |  | ||||||
|             }] |  | ||||||
| 
 |  | ||||||
|         entries = InAdvancePagedList(part_func, len_parts, 1) |  | ||||||
| 
 |  | ||||||
|         return { |  | ||||||
|             '_type': 'multi_video', |  | ||||||
|             'entries': entries, |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TudouPlaylistIE(InfoExtractor): | class TudouPlaylistIE(InfoExtractor): | ||||||
|  | |||||||
| @ -22,7 +22,9 @@ class YoukuIE(InfoExtractor): | |||||||
|     IE_DESC = '优酷' |     IE_DESC = '优酷' | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|         (?: |         (?: | ||||||
|             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| |             https?://( | ||||||
|  |                 (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| | ||||||
|  |                 video\.tudou\.com/v/)| | ||||||
|             youku:) |             youku:) | ||||||
|         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) |         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) | ||||||
|     ''' |     ''' | ||||||
| @ -71,6 +73,16 @@ class YoukuIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', |             'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft', | ||||||
|         }, |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'XMjIyNzAzMTQ4NA', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '卡马乔国足开大脚长传冲吊集锦', | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @ -107,7 +119,7 @@ def _real_extract(self, url): | |||||||
|         # request basic data |         # request basic data | ||||||
|         basic_data_params = { |         basic_data_params = { | ||||||
|             'vid': video_id, |             'vid': video_id, | ||||||
|             'ccode': '0401', |             'ccode': '0402' if 'tudou.com' in url else '0401', | ||||||
|             'client_ip': '192.168.1.1', |             'client_ip': '192.168.1.1', | ||||||
|             'utid': cna, |             'utid': cna, | ||||||
|             'client_ts': time.time() / 1000, |             'client_ts': time.time() / 1000, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
						Yen Chi Hsuan