mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[mailru:music] Add extractor (closes #15618)
This commit is contained in:
		| @@ -1,12 +1,17 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     remove_end, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -157,3 +162,153 @@ class MailRuIE(InfoExtractor): | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class MailRuMusicSearchBaseIE(InfoExtractor): | ||||
|     def _search(self, query, url, audio_id, limit=100, offset=0): | ||||
|         search = self._download_json( | ||||
|             'https://my.mail.ru/cgi-bin/my/ajax', audio_id, | ||||
|             'Downloading songs JSON page %d' % (offset // limit + 1), | ||||
|             headers={ | ||||
|                 'Referer': url, | ||||
|                 'X-Requested-With': 'XMLHttpRequest', | ||||
|             }, query={ | ||||
|                 'xemail': '', | ||||
|                 'ajax_call': '1', | ||||
|                 'func_name': 'music.search', | ||||
|                 'mna': '', | ||||
|                 'mnb': '', | ||||
|                 'arg_query': query, | ||||
|                 'arg_extended': '1', | ||||
|                 'arg_search_params': json.dumps({ | ||||
|                     'music': { | ||||
|                         'limit': limit, | ||||
|                         'offset': offset, | ||||
|                     }, | ||||
|                 }), | ||||
|                 'arg_limit': limit, | ||||
|                 'arg_offset': offset, | ||||
|             }) | ||||
|         return next(e for e in search if isinstance(e, dict)) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_track(t, fatal=True): | ||||
|         audio_url = t['URL'] if fatal else t.get('URL') | ||||
|         if not audio_url: | ||||
|             return | ||||
|  | ||||
|         audio_id = t['File'] if fatal else t.get('File') | ||||
|         if not audio_id: | ||||
|             return | ||||
|  | ||||
|         thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover') | ||||
|         uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML') | ||||
|         uploader_id = t.get('UploaderID') | ||||
|         duration = int_or_none(t.get('DurationInSeconds')) or parse_duration( | ||||
|             t.get('Duration') or t.get('DurationStr')) | ||||
|         view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr')) | ||||
|  | ||||
|         track = t.get('Name') or t.get('Name_Text_HTML') | ||||
|         artist = t.get('Author') or t.get('Author_Text_HTML') | ||||
|  | ||||
|         if track: | ||||
|             title = '%s - %s' % (artist, track) if artist else track | ||||
|         else: | ||||
|             title = audio_id | ||||
|  | ||||
|         return { | ||||
|             'extractor_key': MailRuMusicIE.ie_key(), | ||||
|             'id': audio_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'vcodec': 'none', | ||||
|             'abr': int_or_none(t.get('BitRate')), | ||||
|             'track': track, | ||||
|             'artist': artist, | ||||
|             'album': t.get('Album'), | ||||
|             'url': audio_url, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class MailRuMusicIE(MailRuMusicSearchBaseIE): | ||||
|     IE_NAME = 'mailru:music' | ||||
|     IE_DESC = 'Музыка@Mail.Ru' | ||||
|     _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', | ||||
|         'md5': '0f8c22ef8c5d665b13ac709e63025610', | ||||
|         'info_dict': { | ||||
|             'id': '4e31f7125d0dfaef505d947642366893', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ', | ||||
|             'uploader': 'Игорь Мудрый', | ||||
|             'uploader_id': '1459196328', | ||||
|             'duration': 280, | ||||
|             'view_count': int, | ||||
|             'vcodec': 'none', | ||||
|             'abr': 320, | ||||
|             'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017', | ||||
|             'artist': 'М8Л8ТХ', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, audio_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         music_data = self._search(title, url, audio_id)['MusicData'] | ||||
|         t = next(t for t in music_data if t.get('File') == audio_id) | ||||
|  | ||||
|         info = self._extract_track(t) | ||||
|         info['title'] = title | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): | ||||
|     IE_NAME = 'mailru:music:search' | ||||
|     IE_DESC = 'Музыка@Mail.Ru' | ||||
|     _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://my.mail.ru/music/search/black%20shadow', | ||||
|         'info_dict': { | ||||
|             'id': 'black shadow', | ||||
|         }, | ||||
|         'playlist_mincount': 532, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         query = compat_urllib_parse_unquote(self._match_id(url)) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         LIMIT = 100 | ||||
|         offset = 0 | ||||
|  | ||||
|         for _ in itertools.count(1): | ||||
|             search = self._search(query, url, query, LIMIT, offset) | ||||
|  | ||||
|             music_data = search.get('MusicData') | ||||
|             if not music_data or not isinstance(music_data, list): | ||||
|                 break | ||||
|  | ||||
|             for t in music_data: | ||||
|                 track = self._extract_track(t, fatal=False) | ||||
|                 if track: | ||||
|                     entries.append(track) | ||||
|  | ||||
|             total = try_get( | ||||
|                 search, lambda x: x['Results']['music']['Total'], int) | ||||
|  | ||||
|             if total is not None: | ||||
|                 if offset > total: | ||||
|                     break | ||||
|  | ||||
|             offset += LIMIT | ||||
|  | ||||
|         return self.playlist_result(entries, query) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․