[ie/xminus] Rework Extractor

2025-08-15 08:58:28 +00:00 · 2025-06-02 03:07:17 +09:00 · 2025-06-02 03:07:17 +09:00 · 725815b81f
commit 725815b81f
parent 943083edcd
1 changed files with 95 additions and 54 deletions
--- a/yt_dlp/extractor/xminus.py
+++ b/yt_dlp/extractor/xminus.py
@ -1,77 +1,118 @@
 import re
-import time
+import urllib.parse
 from .common import InfoExtractor
 from ..compat import (
    compat_ord,
 )
 from ..utils import (
    clean_html,
    extract_attributes,
    int_or_none,
    merge_dicts,
    parse_bitrate,
    parse_count,
    parse_duration,
    parse_filesize,
    str_or_none,
    unified_strdate,
    update_url_query,
 )
 from ..utils.traversal import (
    find_element,
    find_elements,
    traverse_obj,
    trim_str,
 )
 class XMinusIE(InfoExtractor):
-    _WORKING = False
+    IE_NAME = 'xminus'
-    _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
+    IE_DESC = 'X-Minus'
-    _TEST = {
+
-        'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
+    _VALID_URL = r'https?://x-minus\.pro/track/(?P<id>\d+)/[^/?#]+'
-        'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+    _TESTS = [{
        'url': 'https://x-minus.pro/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D1%91%D1%80%D0%B0-2',
        'info_dict': {
            'id': '4542',
            'ext': 'mp3',
-            'title': 'Леонид Агутин-Песенка шофёра',
+            'title': 'Песенка шофёра',
-            'duration': 156,
+            'alt_title': 'Instrumental #2',
-            'tbr': 320,
+            'artists': ['Леонид Агутин'],
-            'filesize_approx': 5900000,
+            'description': 'md5:ed26c57333e7e6dc002ff118c5ac419a',
            'duration': 156.0,
            'like_count': int,
            'upload_date': '20120906',
            'view_count': int,
            'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
        },
-    }
+    }, {
        'url': 'https://x-minus.pro/track/389368/%D0%BA%D1%80%D0%B8%D0%BB%D0%B0',
        'info_dict': {
            'id': '389368',
            'ext': 'mp3',
            'title': 'Крила',
            'alt_title': 'Instrumental',
            'artists': ['Jamala'],
            'description': 'md5:c3a0029c81a71fad31d451f42e958768',
            'duration': 263.0,
            'genres': ['arrangement'],
            'like_count': int,
            'tags': ['Pop songs', 'Pop'],
            'upload_date': '20190125',
            'uploader': 'BeKhan',
            'uploader_id': '374800',
            'view_count': int,
        },
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        track_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, track_id)
-        artist = self._html_search_regex(
+        data_k, prefix = traverse_obj(webpage, ((
-            r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
+            {find_element(id='player-data', html=True)},
-        title = artist + '-' + self._html_search_regex(
+            {find_element(id=f'm{track_id}', html=True)},
-            r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
+        ), {extract_attributes}, 'data-k', {str}))
-        duration = parse_duration(self._html_search_regex(
+        data_fn = traverse_obj(webpage, (
-            r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
+            {find_element(id=f'dw-link-m{track_id}')},
-            webpage, 'duration', fatal=False))
+            {find_element(cls='no-ajax', html=True)},
-        mobj = re.search(
+            {extract_attributes}, 'data-fn', {str}))
-            r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
+        s = sum(map(ord, data_k)) + int(track_id) + 1004
-            webpage)
+        c = (int(track_id) - 125_765) // 333
        tbr = filesize_approx = None
        if mobj:
            filesize_approx = float(mobj.group('filesize')) * 1000000
            tbr = float(mobj.group('tbr'))
        view_count = int_or_none(self._html_search_regex(
            r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
            webpage, 'view count', fatal=False))
        description = self._html_search_regex(
            r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
            webpage, 'song lyrics', fatal=False)
        if description:
            description = re.sub(' *\r *', '\n', description)
-        k = self._search_regex(
+        file_url = update_url_query(
-            r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
+            f'https://m5.xmst.cc/dl/minus/{track_id}', {
-            'encoded data')
+                't668': f'{s:x}zyxwz{track_id}.9z{prefix}z{c}',
-        h = time.time() / 3600
+            })
-        a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
+        file_url += f'&trackname={urllib.parse.quote(data_fn, safe="()")}'
-        video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
+
        info = traverse_obj(webpage, (
            {find_element(cls='minustrack-info', html=True)},
            {re.compile(r'<tr[^>]*>([\s\S]+?)</tr>').findall}, ...,
            {lambda x: dict([map(str.strip, clean_html(x).split(':', 1))])},
            all, {lambda x: merge_dicts(*x)}))
        filesize, bitrate = re.match(r'(.+)\s+(\d+\s*kbps)', info.get('File Size')).groups()
        date_str = info.get('Uploaded', '').split('@', 1)[-1].strip()
        return {
-            'id': video_id,
+            'id': track_id,
            'title': title,
            'url': video_url,
            # The extension is unknown until actual downloading
            'ext': 'mp3',
-            'duration': duration,
+            'filesize_approx': parse_filesize(filesize),
-            'filesize_approx': filesize_approx,
+            'genre': traverse_obj(info, ('Type', {str_or_none}, filter)),
-            'tbr': tbr,
+            'tbr': parse_bitrate(bitrate),
-            'view_count': view_count,
+            'upload_date': unified_strdate(date_str) if date_str else None,
-            'description': description,
+            'url': file_url,
            'vcodec': 'none',
            **traverse_obj(webpage, {
                'title': ({find_element(cls='list in-tab tracklist', html=True)}, {extract_attributes}, 'data-tit', {clean_html}),
                'alt_title': ({find_element(cls='minustrack-full-title')}, {find_element(cls='hide-mob')}, {clean_html}),
                'artist': ({find_element(cls='card-tit notranslate')}, {find_element(tag='a')}, {clean_html}),
                'description': ({find_element(cls='tab-lyrics notranslate')}, {clean_html}),
                'duration': ({find_element(cls='player-duration')}, {parse_duration}),
                'like_count': ({find_element(cls='button-like-value')}, {int_or_none}),
                'tags': ({find_elements(cls='minustrack-info-tag')}, ..., {clean_html}, filter, all, filter),
                'view_count': ({find_element(attr='data-tooltip', value='Track rating for all time')}, {clean_html}, {parse_count}),
            }),
            **traverse_obj(webpage, ({find_element(cls='minustrack-info-user', html=True)}, {
                'uploader': {clean_html},
                'uploader_id': ({extract_attributes}, 'href', {trim_str(start='/user/')}),
            })),
        }