mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[cda] Implement birthday verification (closes #12789)
This commit is contained in:
		| @@ -1,6 +1,7 @@ | |||||||
| version <unreleased> | version <unreleased> | ||||||
|  |  | ||||||
| Extractors | Extractors | ||||||
|  | + [cda] Support birthday verification (#12789) | ||||||
| * [leeco] Fix extraction (#12974) | * [leeco] Fix extraction (#12974) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -44,6 +44,7 @@ from youtube_dl.utils import ( | |||||||
|     limit_length, |     limit_length, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
|     month_by_name, |     month_by_name, | ||||||
|  |     multipart_encode, | ||||||
|     ohdave_rsa_encrypt, |     ohdave_rsa_encrypt, | ||||||
|     OnDemandPagedList, |     OnDemandPagedList, | ||||||
|     orderedSet, |     orderedSet, | ||||||
| @@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase): | |||||||
|             'http://example.com/path', {'test': '第二行тест'})), |             'http://example.com/path', {'test': '第二行тест'})), | ||||||
|             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) |             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) | ||||||
|  |  | ||||||
|  |     def test_multipart_encode(self): | ||||||
|  |         self.assertEqual( | ||||||
|  |             multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], | ||||||
|  |             b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') | ||||||
|  |         self.assertEqual( | ||||||
|  |             multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], | ||||||
|  |             b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') | ||||||
|  |         self.assertRaises( | ||||||
|  |             ValueError, multipart_encode, {b'field': b'value'}, boundary='value') | ||||||
|  |  | ||||||
|     def test_dict_get(self): |     def test_dict_get(self): | ||||||
|         FALSE_VALUES = { |         FALSE_VALUES = { | ||||||
|             'none': None, |             'none': None, | ||||||
|   | |||||||
| @@ -9,7 +9,10 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     multipart_encode, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|  |     random_birthday, | ||||||
|  |     urljoin, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -27,7 +30,8 @@ class CDAIE(InfoExtractor): | |||||||
|             'description': 'md5:269ccd135d550da90d1662651fcb9772', |             'description': 'md5:269ccd135d550da90d1662651fcb9772', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'average_rating': float, |             'average_rating': float, | ||||||
|             'duration': 39 |             'duration': 39, | ||||||
|  |             'age_limit': 0, | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.cda.pl/video/57413289', |         'url': 'http://www.cda.pl/video/57413289', | ||||||
| @@ -41,13 +45,41 @@ class CDAIE(InfoExtractor): | |||||||
|             'uploader': 'crash404', |             'uploader': 'crash404', | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'average_rating': float, |             'average_rating': float, | ||||||
|             'duration': 137 |             'duration': 137, | ||||||
|  |             'age_limit': 0, | ||||||
|         } |         } | ||||||
|  |     }, { | ||||||
|  |         # Age-restricted | ||||||
|  |         'url': 'http://www.cda.pl/video/1273454c4', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1273454c4', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Bronson (2008) napisy HD 1080p', | ||||||
|  |             'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', | ||||||
|  |             'height': 1080, | ||||||
|  |             'uploader': 'boniek61', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'duration': 5554, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'view_count': int, | ||||||
|  |             'average_rating': float, | ||||||
|  |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://ebd.cda.pl/0x0/5749950c', |         'url': 'http://ebd.cda.pl/0x0/5749950c', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|  |     def _download_age_confirm_page(self, url, video_id, *args, **kwargs): | ||||||
|  |         form_data = random_birthday('rok', 'miesiac', 'dzien') | ||||||
|  |         form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) | ||||||
|  |         data, content_type = multipart_encode(form_data) | ||||||
|  |         return self._download_webpage( | ||||||
|  |             urljoin(url, '/a/validatebirth'), video_id, *args, | ||||||
|  |             data=data, headers={ | ||||||
|  |                 'Referer': url, | ||||||
|  |                 'Content-Type': content_type, | ||||||
|  |             }, **kwargs) | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         self._set_cookie('cda.pl', 'cda.player', 'html5') |         self._set_cookie('cda.pl', 'cda.player', 'html5') | ||||||
| @@ -57,6 +89,13 @@ class CDAIE(InfoExtractor): | |||||||
|         if 'Ten film jest dostępny dla użytkowników premium' in webpage: |         if 'Ten film jest dostępny dla użytkowników premium' in webpage: | ||||||
|             raise ExtractorError('This video is only available for premium users.', expected=True) |             raise ExtractorError('This video is only available for premium users.', expected=True) | ||||||
|  |  | ||||||
|  |         need_confirm_age = False | ||||||
|  |         if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")', | ||||||
|  |                                    webpage, 'birthday validate form', default=None): | ||||||
|  |             webpage = self._download_age_confirm_page( | ||||||
|  |                 url, video_id, note='Confirming age') | ||||||
|  |             need_confirm_age = True | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|  |  | ||||||
|         uploader = self._search_regex(r'''(?x) |         uploader = self._search_regex(r'''(?x) | ||||||
| @@ -81,6 +120,7 @@ class CDAIE(InfoExtractor): | |||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'duration': None, |             'duration': None, | ||||||
|  |             'age_limit': 18 if need_confirm_age else 0, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         def extract_format(page, version): |         def extract_format(page, version): | ||||||
| @@ -121,7 +161,12 @@ class CDAIE(InfoExtractor): | |||||||
|         for href, resolution in re.findall( |         for href, resolution in re.findall( | ||||||
|                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', |                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | ||||||
|                 webpage): |                 webpage): | ||||||
|             webpage = self._download_webpage( |             if need_confirm_age: | ||||||
|  |                 handler = self._download_age_confirm_page | ||||||
|  |             else: | ||||||
|  |                 handler = self._download_webpage | ||||||
|  |  | ||||||
|  |             webpage = handler( | ||||||
|                 self._BASE_URL + href, video_id, |                 self._BASE_URL + href, video_id, | ||||||
|                 'Downloading %s version information' % resolution, fatal=False) |                 'Downloading %s version information' % resolution, fatal=False) | ||||||
|             if not webpage: |             if not webpage: | ||||||
| @@ -129,6 +174,7 @@ class CDAIE(InfoExtractor): | |||||||
|                 # invalid version is requested. |                 # invalid version is requested. | ||||||
|                 self.report_warning('Unable to download %s version information' % resolution) |                 self.report_warning('Unable to download %s version information' % resolution) | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|             extract_format(webpage, resolution) |             extract_format(webpage, resolution) | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import random |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -11,6 +10,7 @@ from ..utils import ( | |||||||
|     float_or_none, |     float_or_none, | ||||||
|     parse_age_limit, |     parse_age_limit, | ||||||
|     qualities, |     qualities, | ||||||
|  |     random_birthday, | ||||||
|     try_get, |     try_get, | ||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     urljoin, |     urljoin, | ||||||
| @@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|  |         query = random_birthday('birth_year', 'birth_month', 'birth_day') | ||||||
|         video = self._download_json( |         video = self._download_json( | ||||||
|             'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, |             'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, | ||||||
|             video_id, query={ |             video_id, query=query) | ||||||
|                 'birth_month': random.randint(1, 12), |  | ||||||
|                 'birth_day': random.randint(1, 31), |  | ||||||
|                 'birth_year': random.randint(1950, 1995), |  | ||||||
|             }) |  | ||||||
|  |  | ||||||
|         title = video['title'] |         title = video['title'] | ||||||
|  |  | ||||||
|   | |||||||
| @@ -11,6 +11,7 @@ import contextlib | |||||||
| import ctypes | import ctypes | ||||||
| import datetime | import datetime | ||||||
| import email.utils | import email.utils | ||||||
|  | import email.header | ||||||
| import errno | import errno | ||||||
| import functools | import functools | ||||||
| import gzip | import gzip | ||||||
| @@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}): | |||||||
|     return new_req |     return new_req | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def try_multipart_encode(data, boundary): | ||||||
|  |     content_type = 'multipart/form-data; boundary=%s' % boundary | ||||||
|  |  | ||||||
|  |     out = b'' | ||||||
|  |     for k, v in data.items(): | ||||||
|  |         out += b'--' + boundary.encode('ascii') + b'\r\n' | ||||||
|  |         if isinstance(k, compat_str): | ||||||
|  |             k = k.encode('utf-8') | ||||||
|  |         if isinstance(v, compat_str): | ||||||
|  |             v = v.encode('utf-8') | ||||||
|  |         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 | ||||||
|  |         # suggests sending UTF-8 directly. Firefox sends UTF-8, too | ||||||
|  |         content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n' | ||||||
|  |         if boundary.encode('ascii') in content: | ||||||
|  |             raise ValueError('Boundary overlaps with data') | ||||||
|  |         out += content | ||||||
|  |  | ||||||
|  |     out += b'--' + boundary.encode('ascii') + b'--\r\n' | ||||||
|  |  | ||||||
|  |     return out, content_type | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def multipart_encode(data, boundary=None): | ||||||
|  |     ''' | ||||||
|  |     Encode a dict to RFC 7578-compliant form-data | ||||||
|  |  | ||||||
|  |     data: | ||||||
|  |         A dict where keys and values can be either Unicode or bytes-like | ||||||
|  |         objects. | ||||||
|  |     boundary: | ||||||
|  |         If specified a Unicode object, it's used as the boundary. Otherwise | ||||||
|  |         a random boundary is generated. | ||||||
|  |  | ||||||
|  |     Reference: https://tools.ietf.org/html/rfc7578 | ||||||
|  |     ''' | ||||||
|  |     has_specified_boundary = boundary is not None | ||||||
|  |  | ||||||
|  |     while True: | ||||||
|  |         if boundary is None: | ||||||
|  |             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff)) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             out, content_type = try_multipart_encode(data, boundary) | ||||||
|  |             break | ||||||
|  |         except ValueError: | ||||||
|  |             if has_specified_boundary: | ||||||
|  |                 raise | ||||||
|  |             boundary = None | ||||||
|  |  | ||||||
|  |     return out, content_type | ||||||
|  |  | ||||||
|  |  | ||||||
| def dict_get(d, key_or_keys, default=None, skip_false_values=True): | def dict_get(d, key_or_keys, default=None, skip_false_values=True): | ||||||
|     if isinstance(key_or_keys, (list, tuple)): |     if isinstance(key_or_keys, (list, tuple)): | ||||||
|         for key in key_or_keys: |         for key in key_or_keys: | ||||||
| @@ -3760,3 +3813,11 @@ def write_xattr(path, key, value): | |||||||
|                         "Couldn't find a tool to set the xattrs. " |                         "Couldn't find a tool to set the xattrs. " | ||||||
|                         "Install either the python 'xattr' module, " |                         "Install either the python 'xattr' module, " | ||||||
|                         "or the 'xattr' binary.") |                         "or the 'xattr' binary.") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def random_birthday(year_field, month_field, day_field): | ||||||
|  |     return { | ||||||
|  |         year_field: str(random.randint(1950, 1995)), | ||||||
|  |         month_field: str(random.randint(1, 12)), | ||||||
|  |         day_field: str(random.randint(1, 31)), | ||||||
|  |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan