mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 12:10:59 +00:00 
			
		
		
		
	[ie/xhamster] Fix extractor (#14446)
Closes #14395 Authored by: shssoichiro, dhwz, dirkf Co-authored-by: dhwz <3697946+dhwz@users.noreply.github.com> Co-authored-by: dirkf <1222880+dirkf@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									5f94f05490
								
							
						
					
					
						commit
						739125d40f
					
				| @ -2,6 +2,7 @@ | ||||
| import codecs | ||||
| import itertools | ||||
| import re | ||||
| import string | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @ -22,6 +23,47 @@ | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def to_signed_32(n): | ||||
|     return n % ((-1 if n < 0 else 1) * 2**32) | ||||
| 
 | ||||
| 
 | ||||
| class _ByteGenerator: | ||||
|     def __init__(self, algo_id, seed): | ||||
|         try: | ||||
|             self._algorithm = getattr(self, f'_algo{algo_id}') | ||||
|         except AttributeError: | ||||
|             raise ExtractorError(f'Unknown algorithm ID: {algo_id}') | ||||
|         self._s = to_signed_32(seed) | ||||
| 
 | ||||
|     def _algo1(self, s): | ||||
|         # LCG (a=1664525, c=1013904223, m=2^32) | ||||
|         # Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator | ||||
|         s = self._s = to_signed_32(s * 1664525 + 1013904223) | ||||
|         return s | ||||
| 
 | ||||
|     def _algo2(self, s): | ||||
|         # xorshift32 | ||||
|         # Ref: https://en.wikipedia.org/wiki/Xorshift | ||||
|         s = to_signed_32(s ^ (s << 13)) | ||||
|         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17)) | ||||
|         s = self._s = to_signed_32(s ^ (s << 5)) | ||||
|         return s | ||||
| 
 | ||||
|     def _algo3(self, s): | ||||
|         # Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32) | ||||
|         # Ref: https://en.wikipedia.org/wiki/Weyl_sequence | ||||
|         # https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html | ||||
|         s = self._s = to_signed_32(s + 0x9e3779b9) | ||||
|         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) | ||||
|         s = to_signed_32(s * to_signed_32(0x85ebca77)) | ||||
|         s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13)) | ||||
|         s = to_signed_32(s * to_signed_32(0xc2b2ae3d)) | ||||
|         return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) | ||||
| 
 | ||||
|     def __next__(self): | ||||
|         return self._algorithm(self._s) & 0xFF | ||||
| 
 | ||||
| 
 | ||||
| class XHamsterIE(InfoExtractor): | ||||
|     _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.(?:com|desi)|xhday\.com|xhvid\.com)' | ||||
|     _VALID_URL = rf'''(?x) | ||||
| @ -146,6 +188,12 @@ class XHamsterIE(InfoExtractor): | ||||
|     _XOR_KEY = b'xh7999' | ||||
| 
 | ||||
|     def _decipher_format_url(self, format_url, format_id): | ||||
|         if all(char in string.hexdigits for char in format_url): | ||||
|             byte_data = bytes.fromhex(format_url) | ||||
|             seed = int.from_bytes(byte_data[1:5], byteorder='little', signed=True) | ||||
|             byte_gen = _ByteGenerator(byte_data[0], seed) | ||||
|             return bytearray(byte ^ next(byte_gen) for byte in byte_data[5:]).decode('latin-1') | ||||
| 
 | ||||
|         cipher_type, _, ciphertext = try_call( | ||||
|             lambda: base64.b64decode(format_url).decode().partition('_')) or [None] * 3 | ||||
| 
 | ||||
| @ -164,6 +212,16 @@ def _decipher_format_url(self, format_url, format_id): | ||||
|         self.report_warning(f'Skipping format "{format_id}": unsupported cipher type "{cipher_type}"') | ||||
|         return None | ||||
| 
 | ||||
|     def _fixup_formats(self, formats): | ||||
|         for f in formats: | ||||
|             if f.get('vcodec'): | ||||
|                 continue | ||||
|             for vcodec in ('av1', 'h264'): | ||||
|                 if any(f'.{vcodec}.' in f_url for f_url in (f['url'], f.get('manifest_url', ''))): | ||||
|                     f['vcodec'] = vcodec | ||||
|                     break | ||||
|         return formats | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = self._match_valid_url(url) | ||||
|         video_id = mobj.group('id') or mobj.group('id_2') | ||||
| @ -312,7 +370,8 @@ def get_height(s): | ||||
|                 'comment_count': int_or_none(video.get('comments')), | ||||
|                 'age_limit': age_limit if age_limit is not None else 18, | ||||
|                 'categories': categories, | ||||
|                 'formats': formats, | ||||
|                 'formats': self._fixup_formats(formats), | ||||
|                 '_format_sort_fields': ('res', 'proto', 'tbr'), | ||||
|             } | ||||
| 
 | ||||
|         # Old layout fallback | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Josh Holmer
						Josh Holmer