[ie/xminus] Rework Extractor

2025-06-27 17:08:32 +00:00 · 2025-06-02 03:07:17 +09:00 · 2025-06-02 03:07:17 +09:00 · 725815b81f
commit 725815b81f
parent 943083edcd
1 changed files with 95 additions and 54 deletions
--- a/yt_dlp/extractor/xminus.py
+++ b/yt_dlp/extractor/xminus.py
@ -1,77 +1,118 @@
 import re
-import time
+import urllib.parse

 from .common import InfoExtractor
-from ..compat import (
-    compat_ord,
-)
 from ..utils import (
+    clean_html,
+    extract_attributes,
    int_or_none,
+    merge_dicts,
+    parse_bitrate,
+    parse_count,
    parse_duration,
+    parse_filesize,
+    str_or_none,
+    unified_strdate,
+    update_url_query,
+)
+from ..utils.traversal import (
+    find_element,
+    find_elements,
+    traverse_obj,
+    trim_str,
 )


 class XMinusIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
-        'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+    IE_NAME = 'xminus'
+    IE_DESC = 'X-Minus'
+
+    _VALID_URL = r'https?://x-minus\.pro/track/(?P<id>\d+)/[^/?#]+'
+    _TESTS = [{
+        'url': 'https://x-minus.pro/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D1%91%D1%80%D0%B0-2',
        'info_dict': {
            'id': '4542',
            'ext': 'mp3',
-            'title': 'Леонид Агутин-Песенка шофёра',
-            'duration': 156,
-            'tbr': 320,
-            'filesize_approx': 5900000,
+            'title': 'Песенка шофёра',
+            'alt_title': 'Instrumental #2',
+            'artists': ['Леонид Агутин'],
+            'description': 'md5:ed26c57333e7e6dc002ff118c5ac419a',
+            'duration': 156.0,
+            'like_count': int,
+            'upload_date': '20120906',
            'view_count': int,
-            'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
        },
-    }
+    }, {
+        'url': 'https://x-minus.pro/track/389368/%D0%BA%D1%80%D0%B8%D0%BB%D0%B0',
+        'info_dict': {
+            'id': '389368',
+            'ext': 'mp3',
+            'title': 'Крила',
+            'alt_title': 'Instrumental',
+            'artists': ['Jamala'],
+            'description': 'md5:c3a0029c81a71fad31d451f42e958768',
+            'duration': 263.0,
+            'genres': ['arrangement'],
+            'like_count': int,
+            'tags': ['Pop songs', 'Pop'],
+            'upload_date': '20190125',
+            'uploader': 'BeKhan',
+            'uploader_id': '374800',
+            'view_count': int,
+        },
+    }]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        track_id = self._match_id(url)
+        webpage = self._download_webpage(url, track_id)

-        artist = self._html_search_regex(
-            r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
-        title = artist + '-' + self._html_search_regex(
-            r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
-        duration = parse_duration(self._html_search_regex(
-            r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
-            webpage, 'duration', fatal=False))
-        mobj = re.search(
-            r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
-            webpage)
-        tbr = filesize_approx = None
-        if mobj:
-            filesize_approx = float(mobj.group('filesize')) * 1000000
-            tbr = float(mobj.group('tbr'))
-        view_count = int_or_none(self._html_search_regex(
-            r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
-            webpage, 'view count', fatal=False))
-        description = self._html_search_regex(
-            r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
-            webpage, 'song lyrics', fatal=False)
-        if description:
-            description = re.sub(' *\r *', '\n', description)
+        data_k, prefix = traverse_obj(webpage, ((
+            {find_element(id='player-data', html=True)},
+            {find_element(id=f'm{track_id}', html=True)},
+        ), {extract_attributes}, 'data-k', {str}))
+        data_fn = traverse_obj(webpage, (
+            {find_element(id=f'dw-link-m{track_id}')},
+            {find_element(cls='no-ajax', html=True)},
+            {extract_attributes}, 'data-fn', {str}))
+        s = sum(map(ord, data_k)) + int(track_id) + 1004
+        c = (int(track_id) - 125_765) // 333

-        k = self._search_regex(
-            r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
-            'encoded data')
-        h = time.time() / 3600
-        a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
-        video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
+        file_url = update_url_query(
+            f'https://m5.xmst.cc/dl/minus/{track_id}', {
+                't668': f'{s:x}zyxwz{track_id}.9z{prefix}z{c}',
+            })
+        file_url += f'&trackname={urllib.parse.quote(data_fn, safe="()")}'
+
+        info = traverse_obj(webpage, (
+            {find_element(cls='minustrack-info', html=True)},
+            {re.compile(r'<tr[^>]*>([\s\S]+?)</tr>').findall}, ...,
+            {lambda x: dict([map(str.strip, clean_html(x).split(':', 1))])},
+            all, {lambda x: merge_dicts(*x)}))
+
+        filesize, bitrate = re.match(r'(.+)\s+(\d+\s*kbps)', info.get('File Size')).groups()
+        date_str = info.get('Uploaded', '').split('@', 1)[-1].strip()

        return {
-            'id': video_id,
-            'title': title,
-            'url': video_url,
-            # The extension is unknown until actual downloading
+            'id': track_id,
            'ext': 'mp3',
-            'duration': duration,
-            'filesize_approx': filesize_approx,
-            'tbr': tbr,
-            'view_count': view_count,
-            'description': description,
+            'filesize_approx': parse_filesize(filesize),
+            'genre': traverse_obj(info, ('Type', {str_or_none}, filter)),
+            'tbr': parse_bitrate(bitrate),
+            'upload_date': unified_strdate(date_str) if date_str else None,
+            'url': file_url,
+            'vcodec': 'none',
+            **traverse_obj(webpage, {
+                'title': ({find_element(cls='list in-tab tracklist', html=True)}, {extract_attributes}, 'data-tit', {clean_html}),
+                'alt_title': ({find_element(cls='minustrack-full-title')}, {find_element(cls='hide-mob')}, {clean_html}),
+                'artist': ({find_element(cls='card-tit notranslate')}, {find_element(tag='a')}, {clean_html}),
+                'description': ({find_element(cls='tab-lyrics notranslate')}, {clean_html}),
+                'duration': ({find_element(cls='player-duration')}, {parse_duration}),
+                'like_count': ({find_element(cls='button-like-value')}, {int_or_none}),
+                'tags': ({find_elements(cls='minustrack-info-tag')}, ..., {clean_html}, filter, all, filter),
+                'view_count': ({find_element(attr='data-tooltip', value='Track rating for all time')}, {clean_html}, {parse_count}),
+            }),
+            **traverse_obj(webpage, ({find_element(cls='minustrack-info-user', html=True)}, {
+                'uploader': {clean_html},
+                'uploader_id': ({extract_attributes}, 'href', {trim_str(start='/user/')}),
+            })),
        }