mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	
							
								
								
									
										275
									
								
								yt_dlp/extractor/taptap.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										275
									
								
								yt_dlp/extractor/taptap.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,275 @@ | ||||
| import re | ||||
| import uuid | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     str_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
| from ..utils.traversal import traverse_obj | ||||
| 
 | ||||
| 
 | ||||
| class TapTapBaseIE(InfoExtractor): | ||||
|     _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC' | ||||
|     _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get' | ||||
|     _INFO_API = None | ||||
|     _INFO_QUERY_KEY = 'id' | ||||
|     _DATA_PATH = None | ||||
|     _ID_PATH = None | ||||
|     _META_PATH = None | ||||
| 
 | ||||
|     def _get_api(self, url, video_id, query, **kwargs): | ||||
|         query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())} | ||||
|         return self._download_json(url, video_id, query=query, **kwargs)['data'] | ||||
| 
 | ||||
|     def _extract_video(self, video_id): | ||||
|         video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0] | ||||
| 
 | ||||
|         # h265 playlist contains both h265 and h264 formats | ||||
|         video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any)) | ||||
|         formats = self._extract_m3u8_formats(video_url, video_id, fatal=False) | ||||
|         for format in formats: | ||||
|             if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')): | ||||
|                 format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_') | ||||
| 
 | ||||
|         return { | ||||
|             'id': str(video_id), | ||||
|             'formats': formats, | ||||
|             **traverse_obj(video_data, ({ | ||||
|                 'duration': ('info', 'duration', {int_or_none}), | ||||
|                 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}), | ||||
|             }), get_all=False) | ||||
|         } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         query = {self._INFO_QUERY_KEY: video_id} | ||||
| 
 | ||||
|         data = traverse_obj( | ||||
|             self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH) | ||||
| 
 | ||||
|         metainfo = traverse_obj(data, self._META_PATH) | ||||
|         entries = [{ | ||||
|             **metainfo, | ||||
|             **self._extract_video(id) | ||||
|         } for id in set(traverse_obj(data, self._ID_PATH))] | ||||
| 
 | ||||
|         return self.playlist_result(entries, **metainfo, id=video_id) | ||||
| 
 | ||||
| 
 | ||||
| class TapTapMomentIE(TapTapBaseIE): | ||||
|     _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)' | ||||
|     _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail' | ||||
|     _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id') | ||||
|     _META_PATH = ('moment', { | ||||
|         'timestamp': ('created_time', {int_or_none}), | ||||
|         'modified_timestamp': ('edited_time', {int_or_none}), | ||||
|         'uploader': ('author', 'user', 'name', {str}), | ||||
|         'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}), | ||||
|         'title': ('topic', 'title', {str}), | ||||
|         'description': ('topic', 'summary', {str}), | ||||
|     }) | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.taptap.cn/moment/194618230982052443', | ||||
|         'info_dict': { | ||||
|             'id': '194618230982052443', | ||||
|             'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', | ||||
|             'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', | ||||
|             'timestamp': 1633453402, | ||||
|             'upload_date': '20211005', | ||||
|             'modified_timestamp': 1633453402, | ||||
|             'modified_date': '20211005', | ||||
|             'uploader': '乌酱', | ||||
|             'uploader_id': '532896', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '2202584', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', | ||||
|                 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', | ||||
|                 'duration': 66, | ||||
|                 'timestamp': 1633453402, | ||||
|                 'upload_date': '20211005', | ||||
|                 'modified_timestamp': 1633453402, | ||||
|                 'modified_date': '20211005', | ||||
|                 'uploader': '乌酱', | ||||
|                 'uploader_id': '532896', | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.taptap.cn/moment/521630629209573493', | ||||
|         'info_dict': { | ||||
|             'id': '521630629209573493', | ||||
|             'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', | ||||
|             'description': 'md5:2c81245da864428c904d53ae4ad2182b', | ||||
|             'timestamp': 1711425600, | ||||
|             'upload_date': '20240326', | ||||
|             'modified_timestamp': 1711425600, | ||||
|             'modified_date': '20240326', | ||||
|             'uploader': '崩坏:星穹铁道', | ||||
|             'uploader_id': '414732580', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '4006511', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', | ||||
|                 'description': 'md5:2c81245da864428c904d53ae4ad2182b', | ||||
|                 'duration': 173, | ||||
|                 'timestamp': 1711425600, | ||||
|                 'upload_date': '20240326', | ||||
|                 'modified_timestamp': 1711425600, | ||||
|                 'modified_date': '20240326', | ||||
|                 'uploader': '崩坏:星穹铁道', | ||||
|                 'uploader_id': '414732580', | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.taptap.cn/moment/540493587511511299', | ||||
|         'playlist_count': 2, | ||||
|         'info_dict': { | ||||
|             'id': '540493587511511299', | ||||
|             'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!', | ||||
|             'description': 'md5:d60842350e686ddb242291ddfb8e39c9', | ||||
|             'timestamp': 1715920200, | ||||
|             'upload_date': '20240517', | ||||
|             'modified_timestamp': 1715942225, | ||||
|             'modified_date': '20240517', | ||||
|             'uploader': 'TapTap 编辑', | ||||
|             'uploader_id': '7159244', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class TapTapAppIE(TapTapBaseIE): | ||||
|     _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)' | ||||
|     _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail' | ||||
|     _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') | ||||
|     _META_PATH = { | ||||
|         'title': ('title', {str}), | ||||
|         'description': ('description', 'text', {str}, {clean_html}), | ||||
|     } | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.taptap.cn/app/168332', | ||||
|         'info_dict': { | ||||
|             'id': '168332', | ||||
|             'title': '原神', | ||||
|             'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '4058443', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '原神', | ||||
|                 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', | ||||
|                 'duration': 26, | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '4058462', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '原神', | ||||
|                 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', | ||||
|                 'duration': 295, | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class TapTapIntlBase(TapTapBaseIE): | ||||
|     _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0' | ||||
|     _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get' | ||||
| 
 | ||||
| 
 | ||||
| class TapTapAppIntlIE(TapTapIntlBase): | ||||
|     _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)' | ||||
|     _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail' | ||||
|     _DATA_PATH = 'app' | ||||
|     _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') | ||||
|     _META_PATH = { | ||||
|         'title': ('title', {str}), | ||||
|         'description': ('description', 'text', {str}, {clean_html}), | ||||
|     } | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.taptap.io/app/233287', | ||||
|         'info_dict': { | ||||
|             'id': '233287', | ||||
|             'title': '《虹彩六號 M》', | ||||
|             'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '2149708997', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '《虹彩六號 M》', | ||||
|                 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', | ||||
|                 'duration': 78, | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class TapTapPostIntlIE(TapTapIntlBase): | ||||
|     _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)' | ||||
|     _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail' | ||||
|     _INFO_QUERY_KEY = 'id_str' | ||||
|     _DATA_PATH = 'post' | ||||
|     _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id') | ||||
|     _META_PATH = { | ||||
|         'timestamp': ('published_time', {int_or_none}), | ||||
|         'modified_timestamp': ('edited_time', {int_or_none}), | ||||
|         'uploader': ('user', 'name', {str}), | ||||
|         'uploader_id': ('user', 'id', {int}, {str_or_none}), | ||||
|         'title': ('title', {str}), | ||||
|         'description': ('list_fields', 'summary', {str}), | ||||
|     } | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.taptap.io/post/571785', | ||||
|         'info_dict': { | ||||
|             'id': '571785', | ||||
|             'title': 'Arknights x Rainbow Six Siege | Event PV', | ||||
|             'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', | ||||
|             'timestamp': 1614664951, | ||||
|             'upload_date': '20210302', | ||||
|             'modified_timestamp': 1614664951, | ||||
|             'modified_date': '20210302', | ||||
|             'uploader': 'TapTap Editor', | ||||
|             'uploader_id': '80224473', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '2149491903', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Arknights x Rainbow Six Siege | Event PV', | ||||
|                 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', | ||||
|                 'duration': 122, | ||||
|                 'timestamp': 1614664951, | ||||
|                 'upload_date': '20210302', | ||||
|                 'modified_timestamp': 1614664951, | ||||
|                 'modified_date': '20210302', | ||||
|                 'uploader': 'TapTap Editor', | ||||
|                 'uploader_id': '80224473', | ||||
|                 'thumbnail': r're:^https?://.*\.(png|jpg)', | ||||
|             } | ||||
|         }], | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
		Reference in New Issue
	
	Block a user
	 c-basalt
					c-basalt