mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[ie/cda] Fix age-gated web extraction (#9939)
Closes #5980, Closes #6638 Authored by: Podiumnoche, Szpachlarz, dirkf, emqi
This commit is contained in:
		| @@ -16,7 +16,6 @@ from ..utils import ( | |||||||
|     merge_dicts, |     merge_dicts, | ||||||
|     multipart_encode, |     multipart_encode, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     random_birthday, |  | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     try_call, |     try_call, | ||||||
|     try_get, |     try_get, | ||||||
| @@ -63,38 +62,57 @@ class CDAIE(InfoExtractor): | |||||||
|             'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', |             'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'uploader': 'crash404', |             'uploader': 'crash404', | ||||||
|             'view_count': int, |  | ||||||
|             'average_rating': float, |             'average_rating': float, | ||||||
|             'duration': 137, |             'duration': 137, | ||||||
|             'age_limit': 0, |             'age_limit': 0, | ||||||
|  |             'upload_date': '20160220', | ||||||
|  |             'timestamp': 1455968218, | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         # Age-restricted |         # Age-restricted with vfilm redirection | ||||||
|         'url': 'http://www.cda.pl/video/1273454c4', |         'url': 'https://www.cda.pl/video/8753244c4', | ||||||
|  |         'md5': 'd8eeb83d63611289507010d3df3bb8b3', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1273454c4', |             'id': '8753244c4', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Bronson (2008) napisy HD 1080p', |             'title': '[18+] Bez Filtra: Rezerwowe Psy czyli...  najwulgarniejsza polska gra?', | ||||||
|             'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', |             'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e', | ||||||
|             'height': 1080, |             'height': 1080, | ||||||
|             'uploader': 'boniek61', |             'uploader': 'arhn eu', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'duration': 5554, |             'duration': 991, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|             'view_count': int, |  | ||||||
|             'average_rating': float, |             'average_rating': float, | ||||||
|         }, |             'timestamp': 1633888264, | ||||||
|  |             'upload_date': '20211010', | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         # Age-restricted without vfilm redirection | ||||||
|  |         'url': 'https://www.cda.pl/video/17028157b8', | ||||||
|  |         'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '17028157b8', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'STENDUPY MICHAŁ OGIŃSKI', | ||||||
|  |             'description': 'md5:5851f3272bfc31f762d616040a1d609a', | ||||||
|  |             'height': 480, | ||||||
|  |             'uploader': 'oginski', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'duration': 18855, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'average_rating': float, | ||||||
|  |             'timestamp': 1699705901, | ||||||
|  |             'upload_date': '20231111', | ||||||
|  |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://ebd.cda.pl/0x0/5749950c', |         'url': 'http://ebd.cda.pl/0x0/5749950c', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _download_age_confirm_page(self, url, video_id, *args, **kwargs): |     def _download_age_confirm_page(self, url, video_id, *args, **kwargs): | ||||||
|         form_data = random_birthday('rok', 'miesiac', 'dzien') |         data, content_type = multipart_encode({'age_confirm': ''}) | ||||||
|         form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) |  | ||||||
|         data, content_type = multipart_encode(form_data) |  | ||||||
|         return self._download_webpage( |         return self._download_webpage( | ||||||
|             urljoin(url, '/a/validatebirth'), video_id, *args, |             url, video_id, *args, | ||||||
|             data=data, headers={ |             data=data, headers={ | ||||||
|                 'Referer': url, |                 'Referer': url, | ||||||
|                 'Content-Type': content_type, |                 'Content-Type': content_type, | ||||||
| @@ -164,7 +182,7 @@ class CDAIE(InfoExtractor): | |||||||
|         if 'Authorization' in self._API_HEADERS: |         if 'Authorization' in self._API_HEADERS: | ||||||
|             return self._api_extract(video_id) |             return self._api_extract(video_id) | ||||||
|         else: |         else: | ||||||
|             return self._web_extract(video_id, url) |             return self._web_extract(video_id) | ||||||
| 
 | 
 | ||||||
|     def _api_extract(self, video_id): |     def _api_extract(self, video_id): | ||||||
|         meta = self._download_json( |         meta = self._download_json( | ||||||
| @@ -197,9 +215,9 @@ class CDAIE(InfoExtractor): | |||||||
|             'view_count': meta.get('views'), |             'view_count': meta.get('views'), | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|     def _web_extract(self, video_id, url): |     def _web_extract(self, video_id): | ||||||
|         self._set_cookie('cda.pl', 'cda.player', 'html5') |         self._set_cookie('cda.pl', 'cda.player', 'html5') | ||||||
|         webpage = self._download_webpage( |         webpage, urlh = self._download_webpage_handle( | ||||||
|             f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) |             f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) | ||||||
| 
 | 
 | ||||||
|         if 'Ten film jest dostępny dla użytkowników premium' in webpage: |         if 'Ten film jest dostępny dla użytkowników premium' in webpage: | ||||||
| @@ -209,10 +227,10 @@ class CDAIE(InfoExtractor): | |||||||
|             self.raise_geo_restricted() |             self.raise_geo_restricted() | ||||||
| 
 | 
 | ||||||
|         need_confirm_age = False |         need_confirm_age = False | ||||||
|         if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")', |         if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")', | ||||||
|                                    webpage, 'birthday validate form', default=None): |                                    webpage, 'birthday validate form', default=None): | ||||||
|             webpage = self._download_age_confirm_page( |             webpage = self._download_age_confirm_page( | ||||||
|                 url, video_id, note='Confirming age') |                 urlh.url, video_id, note='Confirming age') | ||||||
|             need_confirm_age = True |             need_confirm_age = True | ||||||
| 
 | 
 | ||||||
|         formats = [] |         formats = [] | ||||||
| @@ -222,9 +240,6 @@ class CDAIE(InfoExtractor): | |||||||
|             (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? |             (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | ||||||
|             <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> |             <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | ||||||
|         ''', webpage, 'uploader', default=None, group='uploader') |         ''', webpage, 'uploader', default=None, group='uploader') | ||||||
|         view_count = self._search_regex( |  | ||||||
|             r'Odsłony:(?:\s| )*([0-9]+)', webpage, |  | ||||||
|             'view_count', default=None) |  | ||||||
|         average_rating = self._search_regex( |         average_rating = self._search_regex( | ||||||
|             (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', |             (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | ||||||
|              r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, |              r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, | ||||||
| @@ -235,7 +250,6 @@ class CDAIE(InfoExtractor): | |||||||
|             'title': self._og_search_title(webpage), |             'title': self._og_search_title(webpage), | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|             'uploader': uploader, |             'uploader': uploader, | ||||||
|             'view_count': int_or_none(view_count), |  | ||||||
|             'average_rating': float_or_none(average_rating), |             'average_rating': float_or_none(average_rating), | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Podiumnoche
					Podiumnoche