mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[ie/facebook:ads] Add extractor (#8870)
Closes #8083 Authored by: kclauhk
This commit is contained in:
		| @@ -20,6 +20,7 @@ from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     get_first, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     js_to_json, | ||||
|     merge_dicts, | ||||
|     parse_count, | ||||
| @@ -907,3 +908,114 @@ class FacebookReelIE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id) | ||||
| 
 | ||||
| 
 | ||||
| class FacebookAdsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)' | ||||
|     IE_NAME = 'facebook:ads' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.facebook.com/ads/library/?id=899206155126718', | ||||
|         'info_dict': { | ||||
|             'id': '899206155126718', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'video by Kandao', | ||||
|             'uploader': 'Kandao', | ||||
|             'uploader_id': '774114102743284', | ||||
|             'uploader_url': r're:^https?://.*', | ||||
|             'timestamp': 1702548330, | ||||
|             'thumbnail': r're:^https?://.*', | ||||
|             'upload_date': '20231214', | ||||
|             'like_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/ads/library/?id=893637265423481', | ||||
|         'info_dict': { | ||||
|             'id': '893637265423481', | ||||
|             'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ', | ||||
|             'uploader': 'Eataly Paris Marais', | ||||
|             'uploader_id': '2086668958314152', | ||||
|             'uploader_url': r're:^https?://.*', | ||||
|             'timestamp': 1703571529, | ||||
|             'upload_date': '20231226', | ||||
|             'like_count': int, | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }, { | ||||
|         'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://m.facebook.com/ads/library/?id=901230958115569', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     _FORMATS_MAP = { | ||||
|         'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'), | ||||
|         'video_sd_url': ('sd', None), | ||||
|         'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'), | ||||
|         'video_hd_url': ('hd', None), | ||||
|     } | ||||
| 
 | ||||
|     def _extract_formats(self, video_dict): | ||||
|         formats = [] | ||||
|         for format_key, format_url in traverse_obj(video_dict, ( | ||||
|             {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1]) | ||||
|         )): | ||||
|             formats.append({ | ||||
|                 'format_id': self._FORMATS_MAP[format_key][0], | ||||
|                 'format_note': self._FORMATS_MAP[format_key][1], | ||||
|                 'url': format_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'quality': qualities(tuple(self._FORMATS_MAP))(format_key), | ||||
|             }) | ||||
|         return formats | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
| 
 | ||||
|         post_data = [self._parse_json(j, video_id, fatal=False) | ||||
|                      for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)] | ||||
|         data = traverse_obj(post_data, ( | ||||
|             ..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False) | ||||
|         if not data: | ||||
|             raise ExtractorError('Unable to extract ad data') | ||||
| 
 | ||||
|         title = data.get('title') | ||||
|         if not title or title == '{{product.name}}': | ||||
|             title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data) | ||||
| 
 | ||||
|         info_dict = traverse_obj(data, { | ||||
|             'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}), | ||||
|             'uploader': ('page_name', {str}), | ||||
|             'uploader_id': ('page_id', {str_or_none}), | ||||
|             'uploader_url': ('page_profile_uri', {url_or_none}), | ||||
|             'timestamp': ('creation_time', {int_or_none}), | ||||
|             'like_count': ('page_like_count', {int_or_none}), | ||||
|         }) | ||||
| 
 | ||||
|         entries = [] | ||||
|         for idx, entry in enumerate(traverse_obj( | ||||
|             data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1 | ||||
|         ): | ||||
|             entries.append({ | ||||
|                 'id': f'{video_id}_{idx}', | ||||
|                 'title': entry.get('title') or title, | ||||
|                 'description': entry.get('link_description') or info_dict.get('description'), | ||||
|                 'thumbnail': url_or_none(entry.get('video_preview_image_url')), | ||||
|                 'formats': self._extract_formats(entry), | ||||
|             }) | ||||
| 
 | ||||
|         if len(entries) == 1: | ||||
|             info_dict.update(entries[0]) | ||||
| 
 | ||||
|         elif len(entries) > 1: | ||||
|             info_dict.update({ | ||||
|                 'title': entries[0]['title'], | ||||
|                 'entries': entries, | ||||
|                 '_type': 'playlist', | ||||
|             }) | ||||
| 
 | ||||
|         info_dict['id'] = video_id | ||||
| 
 | ||||
|         return info_dict | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 kclauhk
					kclauhk