mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[cda] Implement birthday verification (closes #12789)
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| version <unreleased> | ||||
|  | ||||
| Extractors | ||||
| + [cda] Support birthday verification (#12789) | ||||
| * [leeco] Fix extraction (#12974) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -44,6 +44,7 @@ from youtube_dl.utils import ( | ||||
|     limit_length, | ||||
|     mimetype2ext, | ||||
|     month_by_name, | ||||
|     multipart_encode, | ||||
|     ohdave_rsa_encrypt, | ||||
|     OnDemandPagedList, | ||||
|     orderedSet, | ||||
| @@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase): | ||||
|             'http://example.com/path', {'test': '第二行тест'})), | ||||
|             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) | ||||
|  | ||||
|     def test_multipart_encode(self): | ||||
|         self.assertEqual( | ||||
|             multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], | ||||
|             b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') | ||||
|         self.assertEqual( | ||||
|             multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], | ||||
|             b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') | ||||
|         self.assertRaises( | ||||
|             ValueError, multipart_encode, {b'field': b'value'}, boundary='value') | ||||
|  | ||||
|     def test_dict_get(self): | ||||
|         FALSE_VALUES = { | ||||
|             'none': None, | ||||
|   | ||||
| @@ -9,7 +9,10 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     multipart_encode, | ||||
|     parse_duration, | ||||
|     random_birthday, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,7 +30,8 @@ class CDAIE(InfoExtractor): | ||||
|             'description': 'md5:269ccd135d550da90d1662651fcb9772', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'average_rating': float, | ||||
|             'duration': 39 | ||||
|             'duration': 39, | ||||
|             'age_limit': 0, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.cda.pl/video/57413289', | ||||
| @@ -41,13 +45,41 @@ class CDAIE(InfoExtractor): | ||||
|             'uploader': 'crash404', | ||||
|             'view_count': int, | ||||
|             'average_rating': float, | ||||
|             'duration': 137 | ||||
|             'duration': 137, | ||||
|             'age_limit': 0, | ||||
|         } | ||||
|     }, { | ||||
|         # Age-restricted | ||||
|         'url': 'http://www.cda.pl/video/1273454c4', | ||||
|         'info_dict': { | ||||
|             'id': '1273454c4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bronson (2008) napisy HD 1080p', | ||||
|             'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', | ||||
|             'height': 1080, | ||||
|             'uploader': 'boniek61', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 5554, | ||||
|             'age_limit': 18, | ||||
|             'view_count': int, | ||||
|             'average_rating': float, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://ebd.cda.pl/0x0/5749950c', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _download_age_confirm_page(self, url, video_id, *args, **kwargs): | ||||
|         form_data = random_birthday('rok', 'miesiac', 'dzien') | ||||
|         form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) | ||||
|         data, content_type = multipart_encode(form_data) | ||||
|         return self._download_webpage( | ||||
|             urljoin(url, '/a/validatebirth'), video_id, *args, | ||||
|             data=data, headers={ | ||||
|                 'Referer': url, | ||||
|                 'Content-Type': content_type, | ||||
|             }, **kwargs) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         self._set_cookie('cda.pl', 'cda.player', 'html5') | ||||
| @@ -57,6 +89,13 @@ class CDAIE(InfoExtractor): | ||||
|         if 'Ten film jest dostępny dla użytkowników premium' in webpage: | ||||
|             raise ExtractorError('This video is only available for premium users.', expected=True) | ||||
|  | ||||
|         need_confirm_age = False | ||||
|         if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")', | ||||
|                                    webpage, 'birthday validate form', default=None): | ||||
|             webpage = self._download_age_confirm_page( | ||||
|                 url, video_id, note='Confirming age') | ||||
|             need_confirm_age = True | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         uploader = self._search_regex(r'''(?x) | ||||
| @@ -81,6 +120,7 @@ class CDAIE(InfoExtractor): | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|             'duration': None, | ||||
|             'age_limit': 18 if need_confirm_age else 0, | ||||
|         } | ||||
|  | ||||
|         def extract_format(page, version): | ||||
| @@ -121,7 +161,12 @@ class CDAIE(InfoExtractor): | ||||
|         for href, resolution in re.findall( | ||||
|                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | ||||
|                 webpage): | ||||
|             webpage = self._download_webpage( | ||||
|             if need_confirm_age: | ||||
|                 handler = self._download_age_confirm_page | ||||
|             else: | ||||
|                 handler = self._download_webpage | ||||
|  | ||||
|             webpage = handler( | ||||
|                 self._BASE_URL + href, video_id, | ||||
|                 'Downloading %s version information' % resolution, fatal=False) | ||||
|             if not webpage: | ||||
| @@ -129,6 +174,7 @@ class CDAIE(InfoExtractor): | ||||
|                 # invalid version is requested. | ||||
|                 self.report_warning('Unable to download %s version information' % resolution) | ||||
|                 continue | ||||
|  | ||||
|             extract_format(webpage, resolution) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -11,6 +10,7 @@ from ..utils import ( | ||||
|     float_or_none, | ||||
|     parse_age_limit, | ||||
|     qualities, | ||||
|     random_birthday, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     urljoin, | ||||
| @@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         query = random_birthday('birth_year', 'birth_month', 'birth_day') | ||||
|         video = self._download_json( | ||||
|             'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, | ||||
|             video_id, query={ | ||||
|                 'birth_month': random.randint(1, 12), | ||||
|                 'birth_day': random.randint(1, 31), | ||||
|                 'birth_year': random.randint(1950, 1995), | ||||
|             }) | ||||
|             video_id, query=query) | ||||
|  | ||||
|         title = video['title'] | ||||
|  | ||||
|   | ||||
| @@ -11,6 +11,7 @@ import contextlib | ||||
| import ctypes | ||||
| import datetime | ||||
| import email.utils | ||||
| import email.header | ||||
| import errno | ||||
| import functools | ||||
| import gzip | ||||
| @@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}): | ||||
|     return new_req | ||||
|  | ||||
|  | ||||
| def try_multipart_encode(data, boundary): | ||||
|     content_type = 'multipart/form-data; boundary=%s' % boundary | ||||
|  | ||||
|     out = b'' | ||||
|     for k, v in data.items(): | ||||
|         out += b'--' + boundary.encode('ascii') + b'\r\n' | ||||
|         if isinstance(k, compat_str): | ||||
|             k = k.encode('utf-8') | ||||
|         if isinstance(v, compat_str): | ||||
|             v = v.encode('utf-8') | ||||
|         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 | ||||
|         # suggests sending UTF-8 directly. Firefox sends UTF-8, too | ||||
|         content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n' | ||||
|         if boundary.encode('ascii') in content: | ||||
|             raise ValueError('Boundary overlaps with data') | ||||
|         out += content | ||||
|  | ||||
|     out += b'--' + boundary.encode('ascii') + b'--\r\n' | ||||
|  | ||||
|     return out, content_type | ||||
|  | ||||
|  | ||||
| def multipart_encode(data, boundary=None): | ||||
|     ''' | ||||
|     Encode a dict to RFC 7578-compliant form-data | ||||
|  | ||||
|     data: | ||||
|         A dict where keys and values can be either Unicode or bytes-like | ||||
|         objects. | ||||
|     boundary: | ||||
|         If specified a Unicode object, it's used as the boundary. Otherwise | ||||
|         a random boundary is generated. | ||||
|  | ||||
|     Reference: https://tools.ietf.org/html/rfc7578 | ||||
|     ''' | ||||
|     has_specified_boundary = boundary is not None | ||||
|  | ||||
|     while True: | ||||
|         if boundary is None: | ||||
|             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff)) | ||||
|  | ||||
|         try: | ||||
|             out, content_type = try_multipart_encode(data, boundary) | ||||
|             break | ||||
|         except ValueError: | ||||
|             if has_specified_boundary: | ||||
|                 raise | ||||
|             boundary = None | ||||
|  | ||||
|     return out, content_type | ||||
|  | ||||
|  | ||||
| def dict_get(d, key_or_keys, default=None, skip_false_values=True): | ||||
|     if isinstance(key_or_keys, (list, tuple)): | ||||
|         for key in key_or_keys: | ||||
| @@ -3760,3 +3813,11 @@ def write_xattr(path, key, value): | ||||
|                         "Couldn't find a tool to set the xattrs. " | ||||
|                         "Install either the python 'xattr' module, " | ||||
|                         "or the 'xattr' binary.") | ||||
|  | ||||
|  | ||||
| def random_birthday(year_field, month_field, day_field): | ||||
|     return { | ||||
|         year_field: str(random.randint(1950, 1995)), | ||||
|         month_field: str(random.randint(1, 12)), | ||||
|         day_field: str(random.randint(1, 31)), | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan