mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[heise] Fix description, thumbnail and format ID
This commit is contained in:
		| @@ -404,7 +404,7 @@ class InfoExtractor(object): | ||||
|             video_info['title'] = playlist_title | ||||
|         return video_info | ||||
|  | ||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): | ||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||
|         """ | ||||
|         Perform a regex search on the given string, using a single or a list of | ||||
|         patterns returning the first matching group. | ||||
| @@ -425,8 +425,11 @@ class InfoExtractor(object): | ||||
|             _name = name | ||||
|  | ||||
|         if mobj: | ||||
|             # return the first matching group | ||||
|             return next(g for g in mobj.groups() if g is not None) | ||||
|             if group is None: | ||||
|                 # return the first matching group | ||||
|                 return next(g for g in mobj.groups() if g is not None) | ||||
|             else: | ||||
|                 return mobj.group(group) | ||||
|         elif default is not _NO_DEFAULT: | ||||
|             return default | ||||
|         elif fatal: | ||||
| @@ -436,11 +439,11 @@ class InfoExtractor(object): | ||||
|                 'please report this issue on http://yt-dl.org/bug' % _name) | ||||
|             return None | ||||
|  | ||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): | ||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||
|         """ | ||||
|         Like _search_regex, but strips HTML tags and unescapes entities. | ||||
|         """ | ||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags) | ||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags, group) | ||||
|         if res: | ||||
|             return clean_html(res).strip() | ||||
|         else: | ||||
| @@ -534,9 +537,9 @@ class InfoExtractor(object): | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=fatal, **kwargs) | ||||
|                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) | ||||
|                     [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), | ||||
|             html, display_name, fatal=fatal, group='content', **kwargs) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_meta_content, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| @@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor): | ||||
|             'title': ( | ||||
|                 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" | ||||
|             ), | ||||
|             'format_id': 'mp4_720', | ||||
|             'format_id': 'mp4_720p', | ||||
|             'timestamp': 1411812600, | ||||
|             'upload_date': '20140927', | ||||
|             'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'thumbnail': 're:^https?://.*\.jpe?g$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor): | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'timestamp': parse_iso8601(get_meta_content('date', webpage)), | ||||
|             'timestamp': parse_iso8601( | ||||
|                 self._html_search_meta('date', webpage)), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|  | ||||
|         title = get_meta_content('fulltitle', webpage) | ||||
|         title = self._html_search_meta('fulltitle', webpage) | ||||
|         if title: | ||||
|             info['title'] = title | ||||
|         else: | ||||
| @@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor): | ||||
|             label = source_node.attrib['label'] | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'^(.*?_)?([0-9]+)p$', label, 'height', default=None)) | ||||
|             video_url = source_node.attrib['file'] | ||||
|             ext = determine_ext(video_url, '') | ||||
|             formats.append({ | ||||
|                 'url': source_node.attrib['file'], | ||||
|                 'url': video_url, | ||||
|                 'format_note': label, | ||||
|                 'format_id': '%s_%s' % (ext, label), | ||||
|                 'height': height, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister