mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[phoenix] Add new extractor (Fixes #4036)
This commit is contained in:
		| @@ -280,6 +280,7 @@ from .orf import ( | |||||||
| from .parliamentliveuk import ParliamentLiveUKIE | from .parliamentliveuk import ParliamentLiveUKIE | ||||||
| from .patreon import PatreonIE | from .patreon import PatreonIE | ||||||
| from .pbs import PBSIE | from .pbs import PBSIE | ||||||
|  | from .phoenix import PhoenixIE | ||||||
| from .photobucket import PhotobucketIE | from .photobucket import PhotobucketIE | ||||||
| from .planetaplay import PlanetaPlayIE | from .planetaplay import PlanetaPlayIE | ||||||
| from .played import PlayedIE | from .played import PlayedIE | ||||||
|   | |||||||
							
								
								
									
										31
									
								
								youtube_dl/extractor/phoenix.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								youtube_dl/extractor/phoenix.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from .zdf import extract_from_xml_url | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class PhoenixIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://www.phoenix.de/content/884301', | ||||||
|  |         'md5': 'ed249f045256150c92e72dbb70eadec6', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '884301', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Michael Krons mit Hans-Werner Sinn', | ||||||
|  |             'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', | ||||||
|  |             'upload_date': '20141025', | ||||||
|  |             'uploader': 'Im Dialog', | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|  |         internal_id = self._search_regex( | ||||||
|  |             r'<div class="phx_vod" id="phx_vod_([0-9]+)"', | ||||||
|  |             webpage, 'internal video ID') | ||||||
|  |  | ||||||
|  |         api_url = 'http://www.phoenix.de/php/zdfplayer-v1.3/data/beitragsDetails.php?ak=web&id=%s' % internal_id | ||||||
|  |         return extract_from_xml_url(self, video_id, api_url) | ||||||
| @@ -10,6 +10,82 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def extract_from_xml_url(ie, video_id, xml_url): | ||||||
|  |     doc = ie._download_xml( | ||||||
|  |         xml_url, video_id, | ||||||
|  |         note='Downloading video info', | ||||||
|  |         errnote='Failed to download video info') | ||||||
|  |  | ||||||
|  |     title = doc.find('.//information/title').text | ||||||
|  |     description = doc.find('.//information/detail').text | ||||||
|  |     duration = int(doc.find('.//details/lengthSec').text) | ||||||
|  |     uploader_node = doc.find('.//details/originChannelTitle') | ||||||
|  |     uploader = None if uploader_node is None else uploader_node.text | ||||||
|  |     uploader_id_node = doc.find('.//details/originChannelId') | ||||||
|  |     uploader_id = None if uploader_id_node is None else uploader_id_node.text | ||||||
|  |     upload_date = unified_strdate(doc.find('.//details/airtime').text) | ||||||
|  |  | ||||||
|  |     def xml_to_format(fnode): | ||||||
|  |         video_url = fnode.find('url').text | ||||||
|  |         is_available = 'http://www.metafilegenerator' not in video_url | ||||||
|  |  | ||||||
|  |         format_id = fnode.attrib['basetype'] | ||||||
|  |         format_m = re.match(r'''(?x) | ||||||
|  |             (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ | ||||||
|  |             (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) | ||||||
|  |         ''', format_id) | ||||||
|  |  | ||||||
|  |         ext = format_m.group('container') | ||||||
|  |         proto = format_m.group('proto').lower() | ||||||
|  |  | ||||||
|  |         quality = fnode.find('./quality').text | ||||||
|  |         abr = int(fnode.find('./audioBitrate').text) // 1000 | ||||||
|  |         vbr_node = fnode.find('./videoBitrate') | ||||||
|  |         vbr = None if vbr_node is None else int(vbr_node.text) // 1000 | ||||||
|  |  | ||||||
|  |         width_node = fnode.find('./width') | ||||||
|  |         width = None if width_node is None else int_or_none(width_node.text) | ||||||
|  |         height_node = fnode.find('./height') | ||||||
|  |         height = None if height_node is None else int_or_none(height_node.text) | ||||||
|  |  | ||||||
|  |         format_note = '' | ||||||
|  |         if not format_note: | ||||||
|  |             format_note = None | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'format_id': format_id + '-' + quality, | ||||||
|  |             'url': video_url, | ||||||
|  |             'ext': ext, | ||||||
|  |             'acodec': format_m.group('acodec'), | ||||||
|  |             'vcodec': format_m.group('vcodec'), | ||||||
|  |             'abr': abr, | ||||||
|  |             'vbr': vbr, | ||||||
|  |             'width': width, | ||||||
|  |             'height': height, | ||||||
|  |             'filesize': int_or_none(fnode.find('./filesize').text), | ||||||
|  |             'format_note': format_note, | ||||||
|  |             'protocol': proto, | ||||||
|  |             '_available': is_available, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |     format_nodes = doc.findall('.//formitaeten/formitaet') | ||||||
|  |     formats = list(filter( | ||||||
|  |         lambda f: f['_available'], | ||||||
|  |         map(xml_to_format, format_nodes))) | ||||||
|  |     ie._sort_formats(formats) | ||||||
|  |  | ||||||
|  |     return { | ||||||
|  |         'id': video_id, | ||||||
|  |         'title': title, | ||||||
|  |         'description': description, | ||||||
|  |         'duration': duration, | ||||||
|  |         'uploader': uploader, | ||||||
|  |         'uploader_id': uploader_id, | ||||||
|  |         'upload_date': upload_date, | ||||||
|  |         'formats': formats, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |  | ||||||
| class ZDFIE(InfoExtractor): | class ZDFIE(InfoExtractor): | ||||||
|     _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' |     _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' | ||||||
|  |  | ||||||
| @@ -32,77 +108,4 @@ class ZDFIE(InfoExtractor): | |||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |  | ||||||
|         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id |         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||||
|         doc = self._download_xml( |         return extract_from_xml_url(self, video_id, xml_url) | ||||||
|             xml_url, video_id, |  | ||||||
|             note='Downloading video info', |  | ||||||
|             errnote='Failed to download video info') |  | ||||||
|  |  | ||||||
|         title = doc.find('.//information/title').text |  | ||||||
|         description = doc.find('.//information/detail').text |  | ||||||
|         duration = int(doc.find('.//details/lengthSec').text) |  | ||||||
|         uploader_node = doc.find('.//details/originChannelTitle') |  | ||||||
|         uploader = None if uploader_node is None else uploader_node.text |  | ||||||
|         uploader_id_node = doc.find('.//details/originChannelId') |  | ||||||
|         uploader_id = None if uploader_id_node is None else uploader_id_node.text |  | ||||||
|         upload_date = unified_strdate(doc.find('.//details/airtime').text) |  | ||||||
|  |  | ||||||
|         def xml_to_format(fnode): |  | ||||||
|             video_url = fnode.find('url').text |  | ||||||
|             is_available = 'http://www.metafilegenerator' not in video_url |  | ||||||
|  |  | ||||||
|             format_id = fnode.attrib['basetype'] |  | ||||||
|             format_m = re.match(r'''(?x) |  | ||||||
|                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ |  | ||||||
|                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) |  | ||||||
|             ''', format_id) |  | ||||||
|  |  | ||||||
|             ext = format_m.group('container') |  | ||||||
|             proto = format_m.group('proto').lower() |  | ||||||
|  |  | ||||||
|             quality = fnode.find('./quality').text |  | ||||||
|             abr = int(fnode.find('./audioBitrate').text) // 1000 |  | ||||||
|             vbr_node = fnode.find('./videoBitrate') |  | ||||||
|             vbr = None if vbr_node is None else int(vbr_node.text) // 1000 |  | ||||||
|  |  | ||||||
|             width_node = fnode.find('./width') |  | ||||||
|             width = None if width_node is None else int_or_none(width_node.text) |  | ||||||
|             height_node = fnode.find('./height') |  | ||||||
|             height = None if height_node is None else int_or_none(height_node.text) |  | ||||||
|  |  | ||||||
|             format_note = '' |  | ||||||
|             if not format_note: |  | ||||||
|                 format_note = None |  | ||||||
|  |  | ||||||
|             return { |  | ||||||
|                 'format_id': format_id + '-' + quality, |  | ||||||
|                 'url': video_url, |  | ||||||
|                 'ext': ext, |  | ||||||
|                 'acodec': format_m.group('acodec'), |  | ||||||
|                 'vcodec': format_m.group('vcodec'), |  | ||||||
|                 'abr': abr, |  | ||||||
|                 'vbr': vbr, |  | ||||||
|                 'width': width, |  | ||||||
|                 'height': height, |  | ||||||
|                 'filesize': int_or_none(fnode.find('./filesize').text), |  | ||||||
|                 'format_note': format_note, |  | ||||||
|                 'protocol': proto, |  | ||||||
|                 '_available': is_available, |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|         format_nodes = doc.findall('.//formitaeten/formitaet') |  | ||||||
|         formats = list(filter( |  | ||||||
|             lambda f: f['_available'], |  | ||||||
|             map(xml_to_format, format_nodes))) |  | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'duration': duration, |  | ||||||
|             'uploader': uploader, |  | ||||||
|             'uploader_id': uploader_id, |  | ||||||
|             'upload_date': upload_date, |  | ||||||
|             'formats': formats, |  | ||||||
|         } |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister