From 725815b81fcb83e2f60c778e022866dcd719f47c Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 2 Jun 2025 03:07:17 +0900 Subject: [PATCH] [ie/xminus] Rework Extractor --- yt_dlp/extractor/xminus.py | 149 +++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 54 deletions(-) diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py index af9cf40ab..dd29d8149 100644 --- a/yt_dlp/extractor/xminus.py +++ b/yt_dlp/extractor/xminus.py @@ -1,77 +1,118 @@ import re -import time +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_ord, -) from ..utils import ( + clean_html, + extract_attributes, int_or_none, + merge_dicts, + parse_bitrate, + parse_count, parse_duration, + parse_filesize, + str_or_none, + unified_strdate, + update_url_query, +) +from ..utils.traversal import ( + find_element, + find_elements, + traverse_obj, + trim_str, ) class XMinusIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P[0-9]+)' - _TEST = { - 'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html', - 'md5': '401a15f2d2dcf6d592cb95528d72a2a8', + IE_NAME = 'xminus' + IE_DESC = 'X-Minus' + + _VALID_URL = r'https?://x-minus\.pro/track/(?P\d+)/[^/?#]+' + _TESTS = [{ + 'url': 'https://x-minus.pro/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D1%91%D1%80%D0%B0-2', 'info_dict': { 'id': '4542', 'ext': 'mp3', - 'title': 'Леонид Агутин-Песенка шофёра', - 'duration': 156, - 'tbr': 320, - 'filesize_approx': 5900000, + 'title': 'Песенка шофёра', + 'alt_title': 'Instrumental #2', + 'artists': ['Леонид Агутин'], + 'description': 'md5:ed26c57333e7e6dc002ff118c5ac419a', + 'duration': 156.0, + 'like_count': int, + 'upload_date': '20120906', 'view_count': int, - 'description': 'md5:03238c5b663810bc79cf42ef3c03e371', }, - } + }, { + 'url': 'https://x-minus.pro/track/389368/%D0%BA%D1%80%D0%B8%D0%BB%D0%B0', + 'info_dict': { + 'id': '389368', + 'ext': 'mp3', + 'title': 'Крила', + 'alt_title': 'Instrumental', + 'artists': ['Jamala'], + 'description': 'md5:c3a0029c81a71fad31d451f42e958768', + 'duration': 263.0, + 'genres': ['arrangement'], + 'like_count': int, + 'tags': ['Pop songs', 'Pop'], + 'upload_date': '20190125', + 'uploader': 'BeKhan', + 'uploader_id': '374800', + 'view_count': int, + }, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + track_id = self._match_id(url) + webpage = self._download_webpage(url, track_id) - artist = self._html_search_regex( - r']+href="/artist/\d+">([^<]+)', webpage, 'artist') - title = artist + '-' + self._html_search_regex( - r']+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title') - duration = parse_duration(self._html_search_regex( - r']+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)', - webpage, 'duration', fatal=False)) - mobj = re.search( - r']+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P\d+)\s*кбит/c\s+(?P[0-9.]+)\s*мб', - webpage) - tbr = filesize_approx = None - if mobj: - filesize_approx = float(mobj.group('filesize')) * 1000000 - tbr = float(mobj.group('tbr')) - view_count = int_or_none(self._html_search_regex( - r'<[^>]+class="icon-chart-bar".*?>(\d+)', - webpage, 'view count', fatal=False)) - description = self._html_search_regex( - r'(?s)]+id="lyrics-original"[^>]*>(.*?)', - webpage, 'song lyrics', fatal=False) - if description: - description = re.sub(' *\r *', '\n', description) + data_k, prefix = traverse_obj(webpage, (( + {find_element(id='player-data', html=True)}, + {find_element(id=f'm{track_id}', html=True)}, + ), {extract_attributes}, 'data-k', {str})) + data_fn = traverse_obj(webpage, ( + {find_element(id=f'dw-link-m{track_id}')}, + {find_element(cls='no-ajax', html=True)}, + {extract_attributes}, 'data-fn', {str})) + s = sum(map(ord, data_k)) + int(track_id) + 1004 + c = (int(track_id) - 125_765) // 333 - k = self._search_regex( - r']+id="player-bottom"[^>]+data-k="([^"]+)">', webpage, - 'encoded data') - h = time.time() / 3600 - a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h - video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h) + file_url = update_url_query( + f'https://m5.xmst.cc/dl/minus/{track_id}', { + 't668': f'{s:x}zyxwz{track_id}.9z{prefix}z{c}', + }) + file_url += f'&trackname={urllib.parse.quote(data_fn, safe="()")}' + + info = traverse_obj(webpage, ( + {find_element(cls='minustrack-info', html=True)}, + {re.compile(r']*>([\s\S]+?)').findall}, ..., + {lambda x: dict([map(str.strip, clean_html(x).split(':', 1))])}, + all, {lambda x: merge_dicts(*x)})) + + filesize, bitrate = re.match(r'(.+)\s+(\d+\s*kbps)', info.get('File Size')).groups() + date_str = info.get('Uploaded', '').split('@', 1)[-1].strip() return { - 'id': video_id, - 'title': title, - 'url': video_url, - # The extension is unknown until actual downloading + 'id': track_id, 'ext': 'mp3', - 'duration': duration, - 'filesize_approx': filesize_approx, - 'tbr': tbr, - 'view_count': view_count, - 'description': description, + 'filesize_approx': parse_filesize(filesize), + 'genre': traverse_obj(info, ('Type', {str_or_none}, filter)), + 'tbr': parse_bitrate(bitrate), + 'upload_date': unified_strdate(date_str) if date_str else None, + 'url': file_url, + 'vcodec': 'none', + **traverse_obj(webpage, { + 'title': ({find_element(cls='list in-tab tracklist', html=True)}, {extract_attributes}, 'data-tit', {clean_html}), + 'alt_title': ({find_element(cls='minustrack-full-title')}, {find_element(cls='hide-mob')}, {clean_html}), + 'artist': ({find_element(cls='card-tit notranslate')}, {find_element(tag='a')}, {clean_html}), + 'description': ({find_element(cls='tab-lyrics notranslate')}, {clean_html}), + 'duration': ({find_element(cls='player-duration')}, {parse_duration}), + 'like_count': ({find_element(cls='button-like-value')}, {int_or_none}), + 'tags': ({find_elements(cls='minustrack-info-tag')}, ..., {clean_html}, filter, all, filter), + 'view_count': ({find_element(attr='data-tooltip', value='Track rating for all time')}, {clean_html}, {parse_count}), + }), + **traverse_obj(webpage, ({find_element(cls='minustrack-info-user', html=True)}, { + 'uploader': {clean_html}, + 'uploader_id': ({extract_attributes}, 'href', {trim_str(start='/user/')}), + })), }