mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[cleanup] Remove dead extractors (#8604)
Closes #1609, Closes #3232, Closes #4763, Closes #6026, Closes #6322, Closes #7912 Authored by: seproDev
This commit is contained in:
		| @@ -1,13 +1,20 @@ | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..aes import aes_decrypt_text | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     format_field, | ||||
|     int_or_none, | ||||
|     str_to_int, | ||||
|     strip_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
| from .keezmovies import KeezMoviesIE | ||||
| 
 | ||||
| 
 | ||||
| class Tube8IE(KeezMoviesIE):  # XXX: Do not subclass from concrete IE | ||||
| class Tube8IE(InfoExtractor): | ||||
|     _WORKING = False | ||||
|     _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' | ||||
|     _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)'] | ||||
|     _TESTS = [{ | ||||
| @@ -30,6 +37,90 @@ class Tube8IE(KeezMoviesIE):  # XXX: Do not subclass from concrete IE | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     def _extract_info(self, url, fatal=True): | ||||
|         mobj = self._match_valid_url(url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = (mobj.group('display_id') | ||||
|                       if 'display_id' in mobj.groupdict() | ||||
|                       else None) or mobj.group('id') | ||||
| 
 | ||||
|         webpage = self._download_webpage( | ||||
|             url, display_id, headers={'Cookie': 'age_verified=1'}) | ||||
| 
 | ||||
|         formats = [] | ||||
|         format_urls = set() | ||||
| 
 | ||||
|         title = None | ||||
|         thumbnail = None | ||||
|         duration = None | ||||
|         encrypted = False | ||||
| 
 | ||||
|         def extract_format(format_url, height=None): | ||||
|             format_url = url_or_none(format_url) | ||||
|             if not format_url or not format_url.startswith(('http', '//')): | ||||
|                 return | ||||
|             if format_url in format_urls: | ||||
|                 return | ||||
|             format_urls.add(format_url) | ||||
|             tbr = int_or_none(self._search_regex( | ||||
|                 r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) | ||||
|             if not height: | ||||
|                 height = int_or_none(self._search_regex( | ||||
|                     r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) | ||||
|             if encrypted: | ||||
|                 format_url = aes_decrypt_text( | ||||
|                     video_url, title, 32).decode('utf-8') | ||||
|             formats.append({ | ||||
|                 'url': format_url, | ||||
|                 'format_id': format_field(height, None, '%dp'), | ||||
|                 'height': height, | ||||
|                 'tbr': tbr, | ||||
|             }) | ||||
| 
 | ||||
|         flashvars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'flashvars\s*=\s*({.+?});', webpage, | ||||
|                 'flashvars', default='{}'), | ||||
|             display_id, fatal=False) | ||||
| 
 | ||||
|         if flashvars: | ||||
|             title = flashvars.get('video_title') | ||||
|             thumbnail = flashvars.get('image_url') | ||||
|             duration = int_or_none(flashvars.get('video_duration')) | ||||
|             encrypted = flashvars.get('encrypted') is True | ||||
|             for key, value in flashvars.items(): | ||||
|                 mobj = re.search(r'quality_(\d+)[pP]', key) | ||||
|                 if mobj: | ||||
|                     extract_format(value, int(mobj.group(1))) | ||||
|             video_url = flashvars.get('video_url') | ||||
|             if video_url and determine_ext(video_url, None): | ||||
|                 extract_format(video_url) | ||||
| 
 | ||||
|         video_url = self._html_search_regex( | ||||
|             r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', | ||||
|             webpage, 'video url', default=None, group='url') | ||||
|         if video_url: | ||||
|             extract_format(compat_urllib_parse_unquote(video_url)) | ||||
| 
 | ||||
|         if not formats: | ||||
|             if 'title="This video is no longer available"' in webpage: | ||||
|                 self.raise_no_formats( | ||||
|                     'Video %s is no longer available' % video_id, expected=True) | ||||
| 
 | ||||
|         if not title: | ||||
|             title = self._html_search_regex( | ||||
|                 r'<h1[^>]*>([^<]+)', webpage, 'title') | ||||
| 
 | ||||
|         return webpage, { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': strip_or_none(title), | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         webpage, info = self._extract_info(url) | ||||
| 
 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 sepro
					sepro