mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 20:21:05 +00:00 
			
		
		
		
	[ie/xhamster] Fix extractor (#14446)
Closes #14395 Authored by: shssoichiro, dhwz, dirkf Co-authored-by: dhwz <3697946+dhwz@users.noreply.github.com> Co-authored-by: dirkf <1222880+dirkf@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									5f94f05490
								
							
						
					
					
						commit
						739125d40f
					
				| @ -2,6 +2,7 @@ | |||||||
| import codecs | import codecs | ||||||
| import itertools | import itertools | ||||||
| import re | import re | ||||||
|  | import string | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @ -22,6 +23,47 @@ | |||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def to_signed_32(n): | ||||||
|  |     return n % ((-1 if n < 0 else 1) * 2**32) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class _ByteGenerator: | ||||||
|  |     def __init__(self, algo_id, seed): | ||||||
|  |         try: | ||||||
|  |             self._algorithm = getattr(self, f'_algo{algo_id}') | ||||||
|  |         except AttributeError: | ||||||
|  |             raise ExtractorError(f'Unknown algorithm ID: {algo_id}') | ||||||
|  |         self._s = to_signed_32(seed) | ||||||
|  | 
 | ||||||
|  |     def _algo1(self, s): | ||||||
|  |         # LCG (a=1664525, c=1013904223, m=2^32) | ||||||
|  |         # Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator | ||||||
|  |         s = self._s = to_signed_32(s * 1664525 + 1013904223) | ||||||
|  |         return s | ||||||
|  | 
 | ||||||
|  |     def _algo2(self, s): | ||||||
|  |         # xorshift32 | ||||||
|  |         # Ref: https://en.wikipedia.org/wiki/Xorshift | ||||||
|  |         s = to_signed_32(s ^ (s << 13)) | ||||||
|  |         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17)) | ||||||
|  |         s = self._s = to_signed_32(s ^ (s << 5)) | ||||||
|  |         return s | ||||||
|  | 
 | ||||||
|  |     def _algo3(self, s): | ||||||
|  |         # Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32) | ||||||
|  |         # Ref: https://en.wikipedia.org/wiki/Weyl_sequence | ||||||
|  |         # https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html | ||||||
|  |         s = self._s = to_signed_32(s + 0x9e3779b9) | ||||||
|  |         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) | ||||||
|  |         s = to_signed_32(s * to_signed_32(0x85ebca77)) | ||||||
|  |         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13)) | ||||||
|  |         s = to_signed_32(s * to_signed_32(0xc2b2ae3d)) | ||||||
|  |         return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) | ||||||
|  | 
 | ||||||
|  |     def __next__(self): | ||||||
|  |         return self._algorithm(self._s) & 0xFF | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class XHamsterIE(InfoExtractor): | class XHamsterIE(InfoExtractor): | ||||||
|     _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)' |     _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)' | ||||||
|     _VALID_URL = rf'''(?x) |     _VALID_URL = rf'''(?x) | ||||||
| @ -146,6 +188,12 @@ class XHamsterIE(InfoExtractor): | |||||||
|     _XOR_KEY = b'xh7999' |     _XOR_KEY = b'xh7999' | ||||||
| 
 | 
 | ||||||
|     def _decipher_format_url(self, format_url, format_id): |     def _decipher_format_url(self, format_url, format_id): | ||||||
|  |         if all(char in string.hexdigits for char in format_url): | ||||||
|  |             byte_data = bytes.fromhex(format_url) | ||||||
|  |             seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True) | ||||||
|  |             byte_gen = _ByteGenerator(byte_data[0], seed) | ||||||
|  |             return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1') | ||||||
|  | 
 | ||||||
|         cipher_type, _, ciphertext = try_call( |         cipher_type, _, ciphertext = try_call( | ||||||
|             lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3 |             lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3 | ||||||
| 
 | 
 | ||||||
| @ -164,6 +212,16 @@ def _decipher_format_url(self, format_url, format_id): | |||||||
|         self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"') |         self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"') | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
|  |     def _fixup_formats(self, formats): | ||||||
|  |         for f in formats: | ||||||
|  |             if f.get('vcodec'): | ||||||
|  |                 continue | ||||||
|  |             for vcodec in ('av1', 'h264'): | ||||||
|  |                 if any(f'.{vcodec}.' in f_url for f_url in (f['url'], f.get('manifest_url', ''))): | ||||||
|  |                     f['vcodec'] = vcodec | ||||||
|  |                     break | ||||||
|  |         return formats | ||||||
|  | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = self._match_valid_url(url) |         mobj = self._match_valid_url(url) | ||||||
|         video_id = mobj.group('id') or mobj.group('id_2') |         video_id = mobj.group('id') or mobj.group('id_2') | ||||||
| @ -312,7 +370,8 @@ def get_height(s): | |||||||
|                 'comment_count': int_or_none(video.get('comments')), |                 'comment_count': int_or_none(video.get('comments')), | ||||||
|                 'age_limit': age_limit if age_limit is not None else 18, |                 'age_limit': age_limit if age_limit is not None else 18, | ||||||
|                 'categories': categories, |                 'categories': categories, | ||||||
|                 'formats': formats, |                 'formats': self._fixup_formats(formats), | ||||||
|  |                 '_format_sort_fields': ('res', 'proto', 'tbr'), | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|         # Old layout fallback |         # Old layout fallback | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Josh Holmer
						Josh Holmer