Merge df676f59cd into f2919bd28e

2025-08-14 00:18:31 +00:00 · 2025-08-12 20:25:36 -03:00 · 2025-08-12 20:25:36 -03:00 · add0d334dd
commit add0d334dd
parent f2919bd28e df676f59cd
1 changed files with 38 additions and 55 deletions
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@ -1,3 +1,4 @@
+import base64
 import hashlib
 import itertools
 import re
@ -8,12 +9,9 @@
 from .openload import PhantomJSwrapper
 from ..utils import (
    ExtractorError,
-    clean_html,
    decode_packed_codes,
    float_or_none,
    format_field,
-    get_element_by_attribute,
-    get_element_by_id,
    int_or_none,
    js_to_json,
    ohdave_rsa_encrypt,
@ -171,14 +169,6 @@ class IqiyiIE(InfoExtractor):
    _NETRC_MACHINE = 'iqiyi'

    _TESTS = [{
-        'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
-        # MD5 checksum differs on my machine and Travis CI
-        'info_dict': {
-            'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
-            'ext': 'mp4',
-            'title': '美国德州空中惊现奇异云团 酷似UFO',
-        },
-    }, {
        'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
        'md5': 'b7dc800a4004b1b57749d9abae0472da',
        'info_dict': {
@ -337,57 +327,50 @@ def _extract_playlist(self, webpage):
        return self.playlist_result(entries, album_id, album_title)

    def _real_extract(self, url):
-        webpage = self._download_webpage(
-            url, 'temp_id', note='download video page')
+        js = self._download_webpage(
+            'https://mesh.if.iqiyi.com/player/lw/lwplay/accelerator.js?apiVer=3',
+            'temp_id_js',
+            headers={'Referer': url},
+        )
+        tvid = self._search_regex(r'"tvid":(\d+)', js, 'tvid')

-        # There's no simple way to determine whether an URL is a playlist or not
-        # Sometimes there are playlist links in individual videos, so treat it
-        # as a single video first
-        tvid = self._search_regex(
-            r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
-        if tvid is None:
-            playlist_result = self._extract_playlist(webpage)
-            if playlist_result:
-                return playlist_result
-            raise ExtractorError('Can\'t find any video')
+        params = {
+            'tvid': tvid,
+            'ad_cid': '',
+            'disableDRM': 'false',
+            'cpt': 0,
+            'apiVer': 3,
+            'format': 'json',
+            'timestamp': int(time.time() * 1000),
+        }

-        video_id = self._search_regex(
-            r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+        download_info = self._download_json(
+            'https://mesh.if.iqiyi.com/player/lw/lwplay/accelerator.js', 'temp_id_json', query=params)
+        ev = download_info.get('ev')
+        if not ev:
+            raise ExtractorError('No ev data in response')

-        formats = []
-        for _ in range(5):
-            raw_data = self.get_raw_data(tvid, video_id)
+        # Decode
+        try:
+            decoded = ''.join(chr(ord(c) ^ 90) for c in ev)
+            ev_data = self._parse_json(decoded, 'ev json')
+        except Exception as e:
+            raise ExtractorError('Failed to decode ev: ' + str(e))

-            if raw_data['code'] != 'A00000':
-                if raw_data['code'] == 'A00111':
-                    self.raise_geo_restricted()
-                raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
+        m3u8_content = traverse_obj(ev_data, ('data', 'program', 'video', 2, 'm3u8'))
+        m3u8_base64 = base64.b64encode(m3u8_content.encode('utf-8')).decode('ascii')
+        m3u8_data_url = f'data:application/vnd.apple.mpegurl;base64,{m3u8_base64}'
+        title = traverse_obj(download_info, ('videoInfo', 'title'))

-            data = raw_data['data']
-
-            for stream in data['vidl']:
-                if 'm3utx' not in stream:
-                    continue
-                vd = str(stream['vd'])
-                formats.append({
-                    'url': stream['m3utx'],
-                    'format_id': vd,
-                    'ext': 'mp4',
-                    'quality': self._FORMATS_MAP.get(vd, -1),
-                    'protocol': 'm3u8_native',
-                })
-
-            if formats:
-                break
-
-            self._sleep(5, video_id)
-
-        title = (get_element_by_id('widget-videotitle', webpage)
-                 or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
-                 or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
+        formats = [{
+            'format_id': 'default',
+            'url': m3u8_data_url,
+            'ext': 'mp4',
+            'protocol': 'm3u8_native',
+        }]

        return {
-            'id': video_id,
+            'id': tvid,
            'title': title,
            'formats': formats,
        }