mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[TikTok] Fix metadata extraction
This commit is contained in:
		| @@ -11,125 +11,102 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TikTokBaseIE(InfoExtractor): | ||||
|     def _extract_aweme(self, props_data, webpage, url): | ||||
|         video_data = try_get(props_data, lambda x: x['pageProps'], expected_type=dict) | ||||
|         video_info = try_get( | ||||
|             video_data, lambda x: x['itemInfo']['itemStruct'], dict) | ||||
|         author_info = try_get( | ||||
|             video_data, lambda x: x['itemInfo']['itemStruct']['author'], dict) or {} | ||||
|         share_info = try_get(video_data, lambda x: x['itemInfo']['shareMeta'], dict) or {} | ||||
|  | ||||
|         unique_id = str_or_none(author_info.get('uniqueId')) | ||||
|         timestamp = try_get(video_info, lambda x: int(x['createTime']), int) | ||||
|         date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d') | ||||
|  | ||||
|         height = try_get(video_info, lambda x: x['video']['height'], int) | ||||
|         width = try_get(video_info, lambda x: x['video']['width'], int) | ||||
|         thumbnails = [] | ||||
|         thumbnails.append({ | ||||
|             'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | ||||
|             'width': width, | ||||
|             'height': height | ||||
|         }) | ||||
|  | ||||
|         url = '' | ||||
|         if not url: | ||||
|             url = try_get(video_info, lambda x: x['video']['playAddr']) | ||||
|         if not url: | ||||
|             url = try_get(video_info, lambda x: x['video']['downloadAddr']) | ||||
|         formats = [] | ||||
|         formats.append({ | ||||
|             'url': url, | ||||
|             'ext': 'mp4', | ||||
|             'height': height, | ||||
|             'width': width | ||||
|         }) | ||||
|  | ||||
|         tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) | ||||
|         return { | ||||
|             'comment_count': int_or_none(video_info.get('commentCount')), | ||||
|             'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int), | ||||
|             'height': height, | ||||
|             'id': str_or_none(video_info.get('id')), | ||||
|             'like_count': int_or_none(video_info.get('diggCount')), | ||||
|             'repost_count': int_or_none(video_info.get('shareCount')), | ||||
|             'thumbnail': try_get(video_info, lambda x: x['covers'][0]), | ||||
|             'timestamp': timestamp, | ||||
|             'width': width, | ||||
|             'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage), | ||||
|             'creator': str_or_none(author_info.get('nickName')), | ||||
|             'uploader': unique_id, | ||||
|             'uploader_id': str_or_none(author_info.get('userId')), | ||||
|             'uploader_url': 'https://www.tiktok.com/@' + unique_id, | ||||
|             'thumbnails': thumbnails, | ||||
|             'upload_date': date, | ||||
|             'webpage_url': self._og_search_url(webpage), | ||||
|             'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')), | ||||
|             'ext': 'mp4', | ||||
|             'formats': formats, | ||||
|             'http_headers': { | ||||
|                 'Referer': url, | ||||
|                 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TikTokIE(TikTokBaseIE): | ||||
| class TikTokIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', | ||||
|         'md5': '34a7543afd5a151b0840ba6736fb633b', | ||||
|         'info_dict': { | ||||
|             'comment_count': int, | ||||
|             'creator': 'facestoriesbyleenabh', | ||||
|             'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95', | ||||
|             'duration': 13, | ||||
|             'ext': 'mp4', | ||||
|             'formats': list, | ||||
|             'height': 1280, | ||||
|             'id': '6748451240264420610', | ||||
|             'like_count': int, | ||||
|             'repost_count': int, | ||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||
|             'thumbnails': list, | ||||
|             'timestamp': 1571246252, | ||||
|             'title': 'facestoriesbyleenabh on TikTok', | ||||
|             'upload_date': '20191016', | ||||
|             'ext': 'mp4', | ||||
|             'title': '#jassmanak #lehanga #leenabhushan', | ||||
|             'description': '#jassmanak #lehanga #leenabhushan', | ||||
|             'duration': 13, | ||||
|             'height': 1280, | ||||
|             'width': 720, | ||||
|             'uploader': 'leenabhushan', | ||||
|             'uploader_id': '6691488002098119685', | ||||
|             'uploader_url': r're:https://www.tiktok.com/@leenabhushan', | ||||
|             'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610', | ||||
|             'width': 720, | ||||
|             'uploader_url': 'https://www.tiktok.com/@leenabhushan', | ||||
|             'creator': 'facestoriesbyleenabh', | ||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||
|             'upload_date': '20191016', | ||||
|             'timestamp': 1571246252, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'repost_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', | ||||
|         'md5': '06b9800d47d5fe51a19e322dd86e61c9', | ||||
|         'info_dict': { | ||||
|             'comment_count': int, | ||||
|             'creator': 'patroX', | ||||
|             'id': '6742501081818877190', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'md5:5e2a23877420bb85ce6521dbee39ba94', | ||||
|             'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', | ||||
|             'duration': 27, | ||||
|             'ext': 'mp4', | ||||
|             'formats': list, | ||||
|             'height': 960, | ||||
|             'id': '6742501081818877190', | ||||
|             'width': 540, | ||||
|             'uploader': 'patrox', | ||||
|             'uploader_id': '18702747', | ||||
|             'uploader_url': 'https://www.tiktok.com/@patrox', | ||||
|             'creator': 'patroX', | ||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||
|             'upload_date': '20190930', | ||||
|             'timestamp': 1569860870, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'repost_count': int, | ||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||
|             'thumbnails': list, | ||||
|             'timestamp': 1569860870, | ||||
|             'title': 'patroX on TikTok', | ||||
|             'upload_date': '20190930', | ||||
|             'uploader': 'patroxofficial', | ||||
|             'uploader_id': '18702747', | ||||
|             'uploader_url': r're:https://www.tiktok.com/@patroxofficial', | ||||
|             'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190', | ||||
|             'width': 540, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _extract_aweme(self, props_data, webpage, url): | ||||
|         video_info = try_get( | ||||
|             props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) | ||||
|         author_info = try_get( | ||||
|             props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} | ||||
|         stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} | ||||
|  | ||||
|         user_id = str_or_none(author_info.get('uniqueId')) | ||||
|         download_url = try_get(video_info, (lambda x: x['video']['playAddr'], | ||||
|                                    lambda x: x['video']['downloadAddr'])) | ||||
|         height = try_get(video_info, lambda x: x['video']['height'], int) | ||||
|         width = try_get(video_info, lambda x: x['video']['width'], int) | ||||
|         thumbnails = [{ | ||||
|             'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | ||||
|             'width': width, | ||||
|             'height': height | ||||
|         }] | ||||
|         tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) | ||||
|  | ||||
|         return { | ||||
|             'id': str_or_none(video_info.get('id')), | ||||
|             'url': download_url, | ||||
|             'ext': 'mp4', | ||||
|             'height': height, | ||||
|             'width': width, | ||||
|             'title': video_info.get('desc') or self._og_search_title(webpage), | ||||
|             'duration': try_get(video_info, lambda x: x['video']['duration'], int), | ||||
|             'view_count': int_or_none(stats_info.get('playCount')), | ||||
|             'like_count': int_or_none(stats_info.get('diggCount')), | ||||
|             'repost_count': int_or_none(stats_info.get('shareCount')), | ||||
|             'comment_count': int_or_none(stats_info.get('commentCount')), | ||||
|             'timestamp': try_get(video_info, lambda x: int(x['createTime']), int), | ||||
|             'creator': str_or_none(author_info.get('nickname')), | ||||
|             'uploader': user_id, | ||||
|             'uploader_id': str_or_none(author_info.get('id')), | ||||
|             'uploader_url': f'https://www.tiktok.com/@{user_id}', | ||||
|             'thumbnails': thumbnails, | ||||
|             'description': str_or_none(video_info.get('desc')), | ||||
|             'webpage_url': self._og_search_url(webpage), | ||||
|             'http_headers': { | ||||
|                 'Referer': url, | ||||
|                 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan