From c70b57c03e0c25767a5166620798297a2a4878fb Mon Sep 17 00:00:00 2001 From: sepro Date: Sat, 6 Dec 2025 22:24:03 +0100 Subject: [PATCH] [ie/Alibaba] Add extractor (#15253) Closes #13774 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/alibaba.py | 42 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 yt_dlp/extractor/alibaba.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 5a71096c96..9129b5e68f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -84,6 +84,7 @@ from .agora import ( ) from .airtv import AirTVIE from .aitube import AitubeKZVideoIE +from .alibaba import AlibabaIE from .aliexpress import AliExpressLiveIE from .aljazeera import AlJazeeraIE from .allocine import AllocineIE diff --git a/yt_dlp/extractor/alibaba.py b/yt_dlp/extractor/alibaba.py new file mode 100644 index 0000000000..0912535266 --- /dev/null +++ b/yt_dlp/extractor/alibaba.py @@ -0,0 +1,42 @@ +from .common import InfoExtractor +from ..utils import int_or_none, str_or_none, url_or_none +from ..utils.traversal import traverse_obj + + +class AlibabaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?alibaba\.com/product-detail/[\w-]+_(?P\d+)\.html' + _TESTS = [{ + 'url': 'https://www.alibaba.com/product-detail/Kids-Entertainment-Bouncer-Bouncy-Castle-Waterslide_1601271126969.html', + 'info_dict': { + 'id': '6000280444270', + 'display_id': '1601271126969', + 'ext': 'mp4', + 'title': 'Kids Entertainment Bouncer Bouncy Castle Waterslide Juex Gonflables Commercial Inflatable Tropical Water Slide', + 'duration': 30, + 'thumbnail': 'https://sc04.alicdn.com/kf/Hc5bb391974454af18c7a4f91cbe4062bg.jpg_120x120.jpg', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + product_data = self._search_json( + r'window\.detailData\s*=', webpage, 'detail data', display_id)['globalData']['product'] + + return { + **traverse_obj(product_data, ('mediaItems', lambda _, v: v['type'] == 'video' and v['videoId'], any, { + 'id': ('videoId', {int}, {str_or_none}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('videoCoverUrl', {url_or_none}), + 'formats': ('videoUrl', lambda _, v: url_or_none(v['videoUrl']), { + 'url': 'videoUrl', + 'format_id': ('definition', {str_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'filesize': ('length', {int_or_none}), + }), + })), + 'title': traverse_obj(product_data, ('subject', {str})), + 'display_id': display_id, + }