mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 09:28:33 +00:00
[ie/xminus] Rework Extractor
This commit is contained in:
parent
943083edcd
commit
725815b81f
@ -1,77 +1,118 @@
|
|||||||
import re
|
import re
|
||||||
import time
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_ord,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_bitrate,
|
||||||
|
parse_count,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
str_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import (
|
||||||
|
find_element,
|
||||||
|
find_elements,
|
||||||
|
traverse_obj,
|
||||||
|
trim_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XMinusIE(InfoExtractor):
|
class XMinusIE(InfoExtractor):
|
||||||
_WORKING = False
|
IE_NAME = 'xminus'
|
||||||
_VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
|
IE_DESC = 'X-Minus'
|
||||||
_TEST = {
|
|
||||||
'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
|
_VALID_URL = r'https?://x-minus\.pro/track/(?P<id>\d+)/[^/?#]+'
|
||||||
'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
|
_TESTS = [{
|
||||||
|
'url': 'https://x-minus.pro/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D1%91%D1%80%D0%B0-2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4542',
|
'id': '4542',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Леонид Агутин-Песенка шофёра',
|
'title': 'Песенка шофёра',
|
||||||
'duration': 156,
|
'alt_title': 'Instrumental #2',
|
||||||
'tbr': 320,
|
'artists': ['Леонид Агутин'],
|
||||||
'filesize_approx': 5900000,
|
'description': 'md5:ed26c57333e7e6dc002ff118c5ac419a',
|
||||||
|
'duration': 156.0,
|
||||||
|
'like_count': int,
|
||||||
|
'upload_date': '20120906',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
|
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://x-minus.pro/track/389368/%D0%BA%D1%80%D0%B8%D0%BB%D0%B0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '389368',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Крила',
|
||||||
|
'alt_title': 'Instrumental',
|
||||||
|
'artists': ['Jamala'],
|
||||||
|
'description': 'md5:c3a0029c81a71fad31d451f42e958768',
|
||||||
|
'duration': 263.0,
|
||||||
|
'genres': ['arrangement'],
|
||||||
|
'like_count': int,
|
||||||
|
'tags': ['Pop songs', 'Pop'],
|
||||||
|
'upload_date': '20190125',
|
||||||
|
'uploader': 'BeKhan',
|
||||||
|
'uploader_id': '374800',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
track_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
artist = self._html_search_regex(
|
data_k, prefix = traverse_obj(webpage, ((
|
||||||
r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
|
{find_element(id='player-data', html=True)},
|
||||||
title = artist + '-' + self._html_search_regex(
|
{find_element(id=f'm{track_id}', html=True)},
|
||||||
r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
|
), {extract_attributes}, 'data-k', {str}))
|
||||||
duration = parse_duration(self._html_search_regex(
|
data_fn = traverse_obj(webpage, (
|
||||||
r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
|
{find_element(id=f'dw-link-m{track_id}')},
|
||||||
webpage, 'duration', fatal=False))
|
{find_element(cls='no-ajax', html=True)},
|
||||||
mobj = re.search(
|
{extract_attributes}, 'data-fn', {str}))
|
||||||
r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
|
s = sum(map(ord, data_k)) + int(track_id) + 1004
|
||||||
webpage)
|
c = (int(track_id) - 125_765) // 333
|
||||||
tbr = filesize_approx = None
|
|
||||||
if mobj:
|
|
||||||
filesize_approx = float(mobj.group('filesize')) * 1000000
|
|
||||||
tbr = float(mobj.group('tbr'))
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
|
|
||||||
webpage, 'view count', fatal=False))
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
|
|
||||||
webpage, 'song lyrics', fatal=False)
|
|
||||||
if description:
|
|
||||||
description = re.sub(' *\r *', '\n', description)
|
|
||||||
|
|
||||||
k = self._search_regex(
|
file_url = update_url_query(
|
||||||
r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
|
f'https://m5.xmst.cc/dl/minus/{track_id}', {
|
||||||
'encoded data')
|
't668': f'{s:x}zyxwz{track_id}.9z{prefix}z{c}',
|
||||||
h = time.time() / 3600
|
})
|
||||||
a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
|
file_url += f'&trackname={urllib.parse.quote(data_fn, safe="()")}'
|
||||||
video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
|
|
||||||
|
info = traverse_obj(webpage, (
|
||||||
|
{find_element(cls='minustrack-info', html=True)},
|
||||||
|
{re.compile(r'<tr[^>]*>([\s\S]+?)</tr>').findall}, ...,
|
||||||
|
{lambda x: dict([map(str.strip, clean_html(x).split(':', 1))])},
|
||||||
|
all, {lambda x: merge_dicts(*x)}))
|
||||||
|
|
||||||
|
filesize, bitrate = re.match(r'(.+)\s+(\d+\s*kbps)', info.get('File Size')).groups()
|
||||||
|
date_str = info.get('Uploaded', '').split('@', 1)[-1].strip()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': track_id,
|
||||||
'title': title,
|
|
||||||
'url': video_url,
|
|
||||||
# The extension is unknown until actual downloading
|
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'duration': duration,
|
'filesize_approx': parse_filesize(filesize),
|
||||||
'filesize_approx': filesize_approx,
|
'genre': traverse_obj(info, ('Type', {str_or_none}, filter)),
|
||||||
'tbr': tbr,
|
'tbr': parse_bitrate(bitrate),
|
||||||
'view_count': view_count,
|
'upload_date': unified_strdate(date_str) if date_str else None,
|
||||||
'description': description,
|
'url': file_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
**traverse_obj(webpage, {
|
||||||
|
'title': ({find_element(cls='list in-tab tracklist', html=True)}, {extract_attributes}, 'data-tit', {clean_html}),
|
||||||
|
'alt_title': ({find_element(cls='minustrack-full-title')}, {find_element(cls='hide-mob')}, {clean_html}),
|
||||||
|
'artist': ({find_element(cls='card-tit notranslate')}, {find_element(tag='a')}, {clean_html}),
|
||||||
|
'description': ({find_element(cls='tab-lyrics notranslate')}, {clean_html}),
|
||||||
|
'duration': ({find_element(cls='player-duration')}, {parse_duration}),
|
||||||
|
'like_count': ({find_element(cls='button-like-value')}, {int_or_none}),
|
||||||
|
'tags': ({find_elements(cls='minustrack-info-tag')}, ..., {clean_html}, filter, all, filter),
|
||||||
|
'view_count': ({find_element(attr='data-tooltip', value='Track rating for all time')}, {clean_html}, {parse_count}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(webpage, ({find_element(cls='minustrack-info-user', html=True)}, {
|
||||||
|
'uploader': {clean_html},
|
||||||
|
'uploader_id': ({extract_attributes}, 'href', {trim_str(start='/user/')}),
|
||||||
|
})),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user