mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 04:00:57 +00:00 
			
		
		
		
	[jamendo] Improve and extract more metadata (closes #11836)
This commit is contained in:
		
							parent
							
								
									15846398ca
								
							
						
					
					
						commit
						3cbecdd111
					
				| @ -5,9 +5,27 @@ | |||||||
| 
 | 
 | ||||||
| from ..compat import compat_urlparse | from ..compat import compat_urlparse | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import parse_duration | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class JamendoIE(InfoExtractor): | class JamendoBaseIE(InfoExtractor): | ||||||
|  |     def _extract_meta(self, webpage, fatal=True): | ||||||
|  |         title = self._og_search_title( | ||||||
|  |             webpage, default=None) or self._search_regex( | ||||||
|  |             r'<title>([^<]+)', webpage, | ||||||
|  |             'title', default=None) | ||||||
|  |         if title: | ||||||
|  |             title = self._search_regex( | ||||||
|  |                 r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) | ||||||
|  |         if not title: | ||||||
|  |             title = self._html_search_meta( | ||||||
|  |                 'name', webpage, 'title', fatal=fatal) | ||||||
|  |         mobj = re.search(r'(.+) - (.+)', title or '') | ||||||
|  |         artist, second = mobj.groups() if mobj else [None] * 2 | ||||||
|  |         return title, artist, second | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class JamendoIE(JamendoBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' |     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', |         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', | ||||||
| @ -17,6 +35,9 @@ class JamendoIE(InfoExtractor): | |||||||
|             'display_id': 'stories-from-emona-i', |             'display_id': 'stories-from-emona-i', | ||||||
|             'ext': 'flac', |             'ext': 'flac', | ||||||
|             'title': 'Maya Filipič - Stories from Emona I', |             'title': 'Maya Filipič - Stories from Emona I', | ||||||
|  |             'artist': 'Maya Filipič', | ||||||
|  |             'track': 'Stories from Emona I', | ||||||
|  |             'duration': 210, | ||||||
|             'thumbnail': r're:^https?://.*\.jpg' |             'thumbnail': r're:^https?://.*\.jpg' | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @ -28,7 +49,7 @@ def _real_extract(self, url): | |||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, display_id) |         webpage = self._download_webpage(url, display_id) | ||||||
| 
 | 
 | ||||||
|         title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*</title>', webpage, 'title') |         title, artist, track = self._extract_meta(webpage) | ||||||
| 
 | 
 | ||||||
|         formats = [{ |         formats = [{ | ||||||
|             'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' |             'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' | ||||||
| @ -46,17 +67,23 @@ def _real_extract(self, url): | |||||||
| 
 | 
 | ||||||
|         thumbnail = self._html_search_meta( |         thumbnail = self._html_search_meta( | ||||||
|             'image', webpage, 'thumbnail', fatal=False) |             'image', webpage, 'thumbnail', fatal=False) | ||||||
|  |         duration = parse_duration(self._search_regex( | ||||||
|  |             r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', | ||||||
|  |             webpage, 'duration', fatal=False)) | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             'id': track_id, |             'id': track_id, | ||||||
|             'display_id': display_id, |             'display_id': display_id, | ||||||
|             'thumbnail': thumbnail, |             'thumbnail': thumbnail, | ||||||
|             'title': title, |             'title': title, | ||||||
|  |             'duration': duration, | ||||||
|  |             'artist': artist, | ||||||
|  |             'track': track, | ||||||
|             'formats': formats |             'formats': formats | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class JamendoAlbumIE(InfoExtractor): | class JamendoAlbumIE(JamendoBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' |     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'https://www.jamendo.com/album/121486/duck-on-cover', |         'url': 'https://www.jamendo.com/album/121486/duck-on-cover', | ||||||
| @ -69,14 +96,18 @@ class JamendoAlbumIE(InfoExtractor): | |||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '1032333', |                 'id': '1032333', | ||||||
|                 'ext': 'flac', |                 'ext': 'flac', | ||||||
|                 'title': 'Shearer - Warmachine' |                 'title': 'Shearer - Warmachine', | ||||||
|  |                 'artist': 'Shearer', | ||||||
|  |                 'track': 'Warmachine', | ||||||
|             } |             } | ||||||
|         }, { |         }, { | ||||||
|             'md5': '1f358d7b2f98edfe90fd55dac0799d50', |             'md5': '1f358d7b2f98edfe90fd55dac0799d50', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '1032330', |                 'id': '1032330', | ||||||
|                 'ext': 'flac', |                 'ext': 'flac', | ||||||
|                 'title': 'Shearer - Without Your Ghost' |                 'title': 'Shearer - Without Your Ghost', | ||||||
|  |                 'artist': 'Shearer', | ||||||
|  |                 'track': 'Without Your Ghost', | ||||||
|             } |             } | ||||||
|         }], |         }], | ||||||
|         'params': { |         'params': { | ||||||
| @ -90,18 +121,18 @@ def _real_extract(self, url): | |||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, mobj.group('display_id')) |         webpage = self._download_webpage(url, mobj.group('display_id')) | ||||||
| 
 | 
 | ||||||
|         title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*</title>', webpage, 'title') |         title, artist, album = self._extract_meta(webpage, fatal=False) | ||||||
| 
 | 
 | ||||||
|         entries = [ |         entries = [{ | ||||||
|             self.url_result( |             '_type': 'url_transparent', | ||||||
|                 compat_urlparse.urljoin(url, m.group('path')), |             'url': compat_urlparse.urljoin(url, m.group('path')), | ||||||
|                 ie=JamendoIE.ie_key(), |             'ie_key': JamendoIE.ie_key(), | ||||||
|                 video_id=self._search_regex( |             'id': self._search_regex( | ||||||
|                     r'/track/(\d+)', m.group('path'), |                 r'/track/(\d+)', m.group('path'), 'track id', default=None), | ||||||
|                     'track id', default=None)) |             'artist': artist, | ||||||
|             for m in re.finditer( |             'album': album, | ||||||
|                 r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', |         } for m in re.finditer( | ||||||
|                 webpage) |             r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', | ||||||
|         ] |             webpage)] | ||||||
| 
 | 
 | ||||||
|         return self.playlist_result(entries, album_id, title) |         return self.playlist_result(entries, album_id, title) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Sergey M․
						Sergey M․