mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[TikTok] Fix metadata extraction
This commit is contained in:
		| @@ -11,125 +11,102 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TikTokBaseIE(InfoExtractor): | class TikTokIE(InfoExtractor): | ||||||
|     def _extract_aweme(self, props_data, webpage, url): |  | ||||||
|         video_data = try_get(props_data, lambda x: x['pageProps'], expected_type=dict) |  | ||||||
|         video_info = try_get( |  | ||||||
|             video_data, lambda x: x['itemInfo']['itemStruct'], dict) |  | ||||||
|         author_info = try_get( |  | ||||||
|             video_data, lambda x: x['itemInfo']['itemStruct']['author'], dict) or {} |  | ||||||
|         share_info = try_get(video_data, lambda x: x['itemInfo']['shareMeta'], dict) or {} |  | ||||||
|  |  | ||||||
|         unique_id = str_or_none(author_info.get('uniqueId')) |  | ||||||
|         timestamp = try_get(video_info, lambda x: int(x['createTime']), int) |  | ||||||
|         date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d') |  | ||||||
|  |  | ||||||
|         height = try_get(video_info, lambda x: x['video']['height'], int) |  | ||||||
|         width = try_get(video_info, lambda x: x['video']['width'], int) |  | ||||||
|         thumbnails = [] |  | ||||||
|         thumbnails.append({ |  | ||||||
|             'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), |  | ||||||
|             'width': width, |  | ||||||
|             'height': height |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|         url = '' |  | ||||||
|         if not url: |  | ||||||
|             url = try_get(video_info, lambda x: x['video']['playAddr']) |  | ||||||
|         if not url: |  | ||||||
|             url = try_get(video_info, lambda x: x['video']['downloadAddr']) |  | ||||||
|         formats = [] |  | ||||||
|         formats.append({ |  | ||||||
|             'url': url, |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'height': height, |  | ||||||
|             'width': width |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|         tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) |  | ||||||
|         return { |  | ||||||
|             'comment_count': int_or_none(video_info.get('commentCount')), |  | ||||||
|             'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int), |  | ||||||
|             'height': height, |  | ||||||
|             'id': str_or_none(video_info.get('id')), |  | ||||||
|             'like_count': int_or_none(video_info.get('diggCount')), |  | ||||||
|             'repost_count': int_or_none(video_info.get('shareCount')), |  | ||||||
|             'thumbnail': try_get(video_info, lambda x: x['covers'][0]), |  | ||||||
|             'timestamp': timestamp, |  | ||||||
|             'width': width, |  | ||||||
|             'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage), |  | ||||||
|             'creator': str_or_none(author_info.get('nickName')), |  | ||||||
|             'uploader': unique_id, |  | ||||||
|             'uploader_id': str_or_none(author_info.get('userId')), |  | ||||||
|             'uploader_url': 'https://www.tiktok.com/@' + unique_id, |  | ||||||
|             'thumbnails': thumbnails, |  | ||||||
|             'upload_date': date, |  | ||||||
|             'webpage_url': self._og_search_url(webpage), |  | ||||||
|             'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')), |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'formats': formats, |  | ||||||
|             'http_headers': { |  | ||||||
|                 'Referer': url, |  | ||||||
|                 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class TikTokIE(TikTokBaseIE): |  | ||||||
|     _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' |     _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)' | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', |         'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', | ||||||
|         'md5': '34a7543afd5a151b0840ba6736fb633b', |         'md5': '34a7543afd5a151b0840ba6736fb633b', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'comment_count': int, |  | ||||||
|             'creator': 'facestoriesbyleenabh', |  | ||||||
|             'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95', |  | ||||||
|             'duration': 13, |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'formats': list, |  | ||||||
|             'height': 1280, |  | ||||||
|             'id': '6748451240264420610', |             'id': '6748451240264420610', | ||||||
|             'like_count': int, |             'ext': 'mp4', | ||||||
|             'repost_count': int, |             'title': '#jassmanak #lehanga #leenabhushan', | ||||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', |             'description': '#jassmanak #lehanga #leenabhushan', | ||||||
|             'thumbnails': list, |             'duration': 13, | ||||||
|             'timestamp': 1571246252, |             'height': 1280, | ||||||
|             'title': 'facestoriesbyleenabh on TikTok', |             'width': 720, | ||||||
|             'upload_date': '20191016', |  | ||||||
|             'uploader': 'leenabhushan', |             'uploader': 'leenabhushan', | ||||||
|             'uploader_id': '6691488002098119685', |             'uploader_id': '6691488002098119685', | ||||||
|             'uploader_url': r're:https://www.tiktok.com/@leenabhushan', |             'uploader_url': 'https://www.tiktok.com/@leenabhushan', | ||||||
|             'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610', |             'creator': 'facestoriesbyleenabh', | ||||||
|             'width': 720, |             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||||
|  |             'upload_date': '20191016', | ||||||
|  |             'timestamp': 1571246252, | ||||||
|  |             'view_count': int, | ||||||
|  |             'like_count': int, | ||||||
|  |             'repost_count': int, | ||||||
|  |             'comment_count': int, | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', |         'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', | ||||||
|         'md5': '06b9800d47d5fe51a19e322dd86e61c9', |         'md5': '06b9800d47d5fe51a19e322dd86e61c9', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'comment_count': int, |             'id': '6742501081818877190', | ||||||
|             'creator': 'patroX', |             'ext': 'mp4', | ||||||
|  |             'title': 'md5:5e2a23877420bb85ce6521dbee39ba94', | ||||||
|             'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', |             'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', | ||||||
|             'duration': 27, |             'duration': 27, | ||||||
|             'ext': 'mp4', |  | ||||||
|             'formats': list, |  | ||||||
|             'height': 960, |             'height': 960, | ||||||
|             'id': '6742501081818877190', |             'width': 540, | ||||||
|  |             'uploader': 'patrox', | ||||||
|  |             'uploader_id': '18702747', | ||||||
|  |             'uploader_url': 'https://www.tiktok.com/@patrox', | ||||||
|  |             'creator': 'patroX', | ||||||
|  |             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', | ||||||
|  |             'upload_date': '20190930', | ||||||
|  |             'timestamp': 1569860870, | ||||||
|  |             'view_count': int, | ||||||
|             'like_count': int, |             'like_count': int, | ||||||
|             'repost_count': int, |             'repost_count': int, | ||||||
|             'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', |             'comment_count': int, | ||||||
|             'thumbnails': list, |  | ||||||
|             'timestamp': 1569860870, |  | ||||||
|             'title': 'patroX on TikTok', |  | ||||||
|             'upload_date': '20190930', |  | ||||||
|             'uploader': 'patroxofficial', |  | ||||||
|             'uploader_id': '18702747', |  | ||||||
|             'uploader_url': r're:https://www.tiktok.com/@patroxofficial', |  | ||||||
|             'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190', |  | ||||||
|             'width': 540, |  | ||||||
|         } |         } | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|  |     def _extract_aweme(self, props_data, webpage, url): | ||||||
|  |         video_info = try_get( | ||||||
|  |             props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) | ||||||
|  |         author_info = try_get( | ||||||
|  |             props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} | ||||||
|  |         stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} | ||||||
|  |  | ||||||
|  |         user_id = str_or_none(author_info.get('uniqueId')) | ||||||
|  |         download_url = try_get(video_info, (lambda x: x['video']['playAddr'], | ||||||
|  |                                    lambda x: x['video']['downloadAddr'])) | ||||||
|  |         height = try_get(video_info, lambda x: x['video']['height'], int) | ||||||
|  |         width = try_get(video_info, lambda x: x['video']['width'], int) | ||||||
|  |         thumbnails = [{ | ||||||
|  |             'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), | ||||||
|  |             'width': width, | ||||||
|  |             'height': height | ||||||
|  |         }] | ||||||
|  |         tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': str_or_none(video_info.get('id')), | ||||||
|  |             'url': download_url, | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'height': height, | ||||||
|  |             'width': width, | ||||||
|  |             'title': video_info.get('desc') or self._og_search_title(webpage), | ||||||
|  |             'duration': try_get(video_info, lambda x: x['video']['duration'], int), | ||||||
|  |             'view_count': int_or_none(stats_info.get('playCount')), | ||||||
|  |             'like_count': int_or_none(stats_info.get('diggCount')), | ||||||
|  |             'repost_count': int_or_none(stats_info.get('shareCount')), | ||||||
|  |             'comment_count': int_or_none(stats_info.get('commentCount')), | ||||||
|  |             'timestamp': try_get(video_info, lambda x: int(x['createTime']), int), | ||||||
|  |             'creator': str_or_none(author_info.get('nickname')), | ||||||
|  |             'uploader': user_id, | ||||||
|  |             'uploader_id': str_or_none(author_info.get('id')), | ||||||
|  |             'uploader_url': f'https://www.tiktok.com/@{user_id}', | ||||||
|  |             'thumbnails': thumbnails, | ||||||
|  |             'description': str_or_none(video_info.get('desc')), | ||||||
|  |             'webpage_url': self._og_search_url(webpage), | ||||||
|  |             'http_headers': { | ||||||
|  |                 'Referer': url, | ||||||
|  |                 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan