mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	 Kacper Michajłow
					Kacper Michajłow
				
			
				
					committed by
					
						 Sergey M․
						Sergey M․
					
				
			
			
				
	
			
			
			 Sergey M․
						Sergey M․
					
				
			
						parent
						
							3d2729514f
						
					
				
				
					commit
					577281b0c6
				
			| @@ -5,14 +5,16 @@ import re | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     decode_packed_codes, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     parse_duration |     float_or_none, | ||||||
|  |     int_or_none, | ||||||
|  |     parse_duration, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class CDAIE(InfoExtractor): | class CDAIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' |     _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' | ||||||
|  |     _BASE_URL = 'http://www.cda.pl/' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.cda.pl/video/5749950c', |         'url': 'http://www.cda.pl/video/5749950c', | ||||||
|         'md5': '6f844bf51b15f31fae165365707ae970', |         'md5': '6f844bf51b15f31fae165365707ae970', | ||||||
| @@ -21,6 +23,9 @@ class CDAIE(InfoExtractor): | |||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'height': 720, |             'height': 720, | ||||||
|             'title': 'Oto dlaczego przed zakrętem należy zwolnić.', |             'title': 'Oto dlaczego przed zakrętem należy zwolnić.', | ||||||
|  |             'description': 'md5:269ccd135d550da90d1662651fcb9772', | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|  |             'average_rating': float, | ||||||
|             'duration': 39 |             'duration': 39 | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
| @@ -30,6 +35,11 @@ class CDAIE(InfoExtractor): | |||||||
|             'id': '57413289', |             'id': '57413289', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Lądowanie na lotnisku na Maderze', |             'title': 'Lądowanie na lotnisku na Maderze', | ||||||
|  |             'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', | ||||||
|  |             'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|  |             'uploader': 'crash404', | ||||||
|  |             'view_count': int, | ||||||
|  |             'average_rating': float, | ||||||
|             'duration': 137 |             'duration': 137 | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
| @@ -39,31 +49,55 @@ class CDAIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) |         self._set_cookie('cda.pl', 'cda.player', 'html5') | ||||||
|  |         webpage = self._download_webpage( | ||||||
|  |             self._BASE_URL + '/video/' + video_id, video_id) | ||||||
|  |  | ||||||
|         if 'Ten film jest dostępny dla użytkowników premium' in webpage: |         if 'Ten film jest dostępny dla użytkowników premium' in webpage: | ||||||
|             raise ExtractorError('This video is only available for premium users.', expected=True) |             raise ExtractorError('This video is only available for premium users.', expected=True) | ||||||
|  |  | ||||||
|         title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') |  | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|  |  | ||||||
|  |         uploader = self._search_regex(r'''(?x) | ||||||
|  |             <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> | ||||||
|  |             (?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*? | ||||||
|  |             <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3> | ||||||
|  |         ''', webpage, 'uploader', default=None, group='uploader') | ||||||
|  |         view_count = self._search_regex( | ||||||
|  |             r'Odsłony:(?:\s| )*([0-9]+)', webpage, | ||||||
|  |             'view_count', default=None) | ||||||
|  |         average_rating = self._search_regex( | ||||||
|  |             r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | ||||||
|  |             webpage, 'rating', fatal=False, group='rating_value') | ||||||
|  |  | ||||||
|         info_dict = { |         info_dict = { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': self._og_search_title(webpage), | ||||||
|  |             'description': self._og_search_description(webpage), | ||||||
|  |             'uploader': uploader, | ||||||
|  |             'view_count': int_or_none(view_count), | ||||||
|  |             'average_rating': float_or_none(average_rating), | ||||||
|  |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'duration': None, |             'duration': None, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         def extract_format(page, version): |         def extract_format(page, version): | ||||||
|             unpacked = decode_packed_codes(page) |             json_str = self._search_regex( | ||||||
|             format_url = self._search_regex( |                 r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, | ||||||
|                 r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, |                 '%s player_json' % version, fatal=False, group='player_data') | ||||||
|                 '%s url' % version, fatal=False, group='url') |             if not json_str: | ||||||
|             if not format_url: |                 return | ||||||
|  |             player_data = self._parse_json( | ||||||
|  |                 json_str, '%s player_data' % version, fatal=False) | ||||||
|  |             if not player_data: | ||||||
|  |                 return | ||||||
|  |             video = player_data.get('video') | ||||||
|  |             if not video or 'file' not in video: | ||||||
|  |                 self.report_warning('Unable to extract %s version information' % version) | ||||||
|                 return |                 return | ||||||
|             f = { |             f = { | ||||||
|                 'url': format_url, |                 'url': video['file'], | ||||||
|             } |             } | ||||||
|             m = re.search( |             m = re.search( | ||||||
|                 r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', |                 r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', | ||||||
| @@ -75,9 +109,7 @@ class CDAIE(InfoExtractor): | |||||||
|                 }) |                 }) | ||||||
|             info_dict['formats'].append(f) |             info_dict['formats'].append(f) | ||||||
|             if not info_dict['duration']: |             if not info_dict['duration']: | ||||||
|                 info_dict['duration'] = parse_duration(self._search_regex( |                 info_dict['duration'] = parse_duration(video.get('duration')) | ||||||
|                     r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1", |  | ||||||
|                     unpacked, 'duration', fatal=False, group='duration')) |  | ||||||
|  |  | ||||||
|         extract_format(webpage, 'default') |         extract_format(webpage, 'default') | ||||||
|  |  | ||||||
| @@ -85,7 +117,8 @@ class CDAIE(InfoExtractor): | |||||||
|                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', |                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', | ||||||
|                 webpage): |                 webpage): | ||||||
|             webpage = self._download_webpage( |             webpage = self._download_webpage( | ||||||
|                 href, video_id, 'Downloading %s version information' % resolution, fatal=False) |                 self._BASE_URL + href, video_id, | ||||||
|  |                 'Downloading %s version information' % resolution, fatal=False) | ||||||
|             if not webpage: |             if not webpage: | ||||||
|                 # Manually report warning because empty page is returned when |                 # Manually report warning because empty page is returned when | ||||||
|                 # invalid version is requested. |                 # invalid version is requested. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user