mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[vshare] Improve extraction, fix formats sorting and carry long lines
This commit is contained in:
		| @@ -26,13 +26,20 @@ class VShareIE(InfoExtractor): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _extract_urls(webpage): | ||||||
|  |         return re.findall( | ||||||
|  |             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', | ||||||
|  |             webpage) | ||||||
|  |  | ||||||
|     def _extract_packed(self, webpage): |     def _extract_packed(self, webpage): | ||||||
|         packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') |         packed = self._search_regex( | ||||||
|  |             r'(eval\(function.+)', webpage, 'packed code') | ||||||
|         unpacked = decode_packed_codes(packed) |         unpacked = decode_packed_codes(packed) | ||||||
|         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') |         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') | ||||||
|         digits = digits.split(',') |         digits = [int(digit) for digit in digits.split(',')] | ||||||
|         digits = [int(digit) for digit in digits] |         key_digit = self._search_regex( | ||||||
|         key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') |             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') | ||||||
|         chars = [compat_chr(d - int(key_digit)) for d in digits] |         chars = [compat_chr(d - int(key_digit)) for d in digits] | ||||||
|         return ''.join(chars) |         return ''.join(chars) | ||||||
|  |  | ||||||
| @@ -40,9 +47,11 @@ class VShareIE(InfoExtractor): | |||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|         webpage = self._download_webpage( |         webpage = self._download_webpage( | ||||||
|             'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) |             'https://vshare.io/v/%s/width-650/height-430/1' % video_id, | ||||||
|  |             video_id) | ||||||
|  |  | ||||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') |         title = self._html_search_regex( | ||||||
|  |             r'<title>([^<]+)</title>', webpage, 'title') | ||||||
|         title = title.split(' - ')[0] |         title = title.split(' - ')[0] | ||||||
|  |  | ||||||
|         error = self._html_search_regex( |         error = self._html_search_regex( | ||||||
| @@ -51,17 +60,15 @@ class VShareIE(InfoExtractor): | |||||||
|         if error: |         if error: | ||||||
|             raise ExtractorError(error, expected=True) |             raise ExtractorError(error, expected=True) | ||||||
|  |  | ||||||
|         unpacked = self._extract_packed(webpage) |         info = self._parse_html5_media_entries( | ||||||
|         video_urls = re.findall(r'<source src="([^"]+)', unpacked) |             url, '<video>%s</video>' % self._extract_packed(webpage), | ||||||
|         formats = [{'url': video_url} for video_url in video_urls] |             video_id)[0] | ||||||
|         return { |  | ||||||
|  |         self._sort_formats(info['formats']) | ||||||
|  |  | ||||||
|  |         info.update({ | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'formats': formats, |         }) | ||||||
|         } |  | ||||||
|  |  | ||||||
|     @staticmethod |         return info | ||||||
|     def _extract_urls(webpage): |  | ||||||
|         return re.findall( |  | ||||||
|             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', |  | ||||||
|             webpage) |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․