mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[heise] Fix description, thumbnail and format ID
This commit is contained in:
		| @@ -404,7 +404,7 @@ class InfoExtractor(object): | |||||||
|             video_info['title'] = playlist_title |             video_info['title'] = playlist_title | ||||||
|         return video_info |         return video_info | ||||||
|  |  | ||||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): |     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||||
|         """ |         """ | ||||||
|         Perform a regex search on the given string, using a single or a list of |         Perform a regex search on the given string, using a single or a list of | ||||||
|         patterns returning the first matching group. |         patterns returning the first matching group. | ||||||
| @@ -425,8 +425,11 @@ class InfoExtractor(object): | |||||||
|             _name = name |             _name = name | ||||||
|  |  | ||||||
|         if mobj: |         if mobj: | ||||||
|             # return the first matching group |             if group is None: | ||||||
|             return next(g for g in mobj.groups() if g is not None) |                 # return the first matching group | ||||||
|  |                 return next(g for g in mobj.groups() if g is not None) | ||||||
|  |             else: | ||||||
|  |                 return mobj.group(group) | ||||||
|         elif default is not _NO_DEFAULT: |         elif default is not _NO_DEFAULT: | ||||||
|             return default |             return default | ||||||
|         elif fatal: |         elif fatal: | ||||||
| @@ -436,11 +439,11 @@ class InfoExtractor(object): | |||||||
|                 'please report this issue on http://yt-dl.org/bug' % _name) |                 'please report this issue on http://yt-dl.org/bug' % _name) | ||||||
|             return None |             return None | ||||||
|  |  | ||||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): |     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||||
|         """ |         """ | ||||||
|         Like _search_regex, but strips HTML tags and unescapes entities. |         Like _search_regex, but strips HTML tags and unescapes entities. | ||||||
|         """ |         """ | ||||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags) |         res = self._search_regex(pattern, string, name, default, fatal, flags, group) | ||||||
|         if res: |         if res: | ||||||
|             return clean_html(res).strip() |             return clean_html(res).strip() | ||||||
|         else: |         else: | ||||||
| @@ -534,9 +537,9 @@ class InfoExtractor(object): | |||||||
|             display_name = name |             display_name = name | ||||||
|         return self._html_search_regex( |         return self._html_search_regex( | ||||||
|             r'''(?ix)<meta |             r'''(?ix)<meta | ||||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?) |                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) | ||||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), |                     [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), | ||||||
|             html, display_name, fatal=fatal, **kwargs) |             html, display_name, fatal=fatal, group='content', **kwargs) | ||||||
|  |  | ||||||
|     def _dc_search_uploader(self, html): |     def _dc_search_uploader(self, html): | ||||||
|         return self._html_search_meta('dc.creator', html, 'uploader') |         return self._html_search_meta('dc.creator', html, 'uploader') | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     get_meta_content, |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
| ) | ) | ||||||
| @@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor): | |||||||
|             'title': ( |             'title': ( | ||||||
|                 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" |                 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" | ||||||
|             ), |             ), | ||||||
|             'format_id': 'mp4_720', |             'format_id': 'mp4_720p', | ||||||
|             'timestamp': 1411812600, |             'timestamp': 1411812600, | ||||||
|             'upload_date': '20140927', |             'upload_date': '20140927', | ||||||
|             'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', |             'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', | ||||||
|             'thumbnail': 're:https?://.*\.jpg$', |             'thumbnail': 're:^https?://.*\.jpe?g$', | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor): | |||||||
|         info = { |         info = { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|             'timestamp': parse_iso8601(get_meta_content('date', webpage)), |             'timestamp': parse_iso8601( | ||||||
|  |                 self._html_search_meta('date', webpage)), | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         title = get_meta_content('fulltitle', webpage) |         title = self._html_search_meta('fulltitle', webpage) | ||||||
|         if title: |         if title: | ||||||
|             info['title'] = title |             info['title'] = title | ||||||
|         else: |         else: | ||||||
| @@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor): | |||||||
|             label = source_node.attrib['label'] |             label = source_node.attrib['label'] | ||||||
|             height = int_or_none(self._search_regex( |             height = int_or_none(self._search_regex( | ||||||
|                 r'^(.*?_)?([0-9]+)p$', label, 'height', default=None)) |                 r'^(.*?_)?([0-9]+)p$', label, 'height', default=None)) | ||||||
|  |             video_url = source_node.attrib['file'] | ||||||
|  |             ext = determine_ext(video_url, '') | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': source_node.attrib['file'], |                 'url': video_url, | ||||||
|                 'format_note': label, |                 'format_note': label, | ||||||
|  |                 'format_id': '%s_%s' % (ext, label), | ||||||
|                 'height': height, |                 'height': height, | ||||||
|             }) |             }) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister