mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Update to ytdl-commit-de39d128
[extractor/ceskatelevize] Back-port extractor from yt-dlp
de39d1281c
Closes #5361, Closes #4634, Closes #5210
			
			
This commit is contained in:
		| @@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| import base64 | ||||
| 
 | ||||
| from yt_dlp.aes import ( | ||||
|     BLOCK_SIZE_BYTES, | ||||
|     aes_cbc_decrypt, | ||||
|     aes_cbc_decrypt_bytes, | ||||
|     aes_cbc_encrypt, | ||||
| @@ -103,8 +102,7 @@ class TestAES(unittest.TestCase): | ||||
| 
 | ||||
|     def test_ecb_encrypt(self): | ||||
|         data = bytes_to_intlist(self.secret_msg) | ||||
|         data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) | ||||
|         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) | ||||
|         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) | ||||
|         self.assertEqual( | ||||
|             encrypted, | ||||
|             b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') | ||||
|   | ||||
| @@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): | ||||
|     return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) | ||||
| 
 | ||||
| 
 | ||||
| BLOCK_SIZE_BYTES = 16 | ||||
| 
 | ||||
| 
 | ||||
| def unpad_pkcs7(data): | ||||
|     return data[:-compat_ord(data[-1])] | ||||
| 
 | ||||
| 
 | ||||
| BLOCK_SIZE_BYTES = 16 | ||||
| def pkcs7_padding(data): | ||||
|     """ | ||||
|     PKCS#7 padding | ||||
| 
 | ||||
|     @param {int[]} data        cleartext | ||||
|     @returns {int[]}           padding data | ||||
|     """ | ||||
| 
 | ||||
|     remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES | ||||
|     return data + [remaining_length] * remaining_length | ||||
| 
 | ||||
| 
 | ||||
| def pad_block(block, padding_mode): | ||||
| @@ -64,7 +76,7 @@ def pad_block(block, padding_mode): | ||||
| 
 | ||||
| def aes_ecb_encrypt(data, key, iv=None): | ||||
|     """ | ||||
|     Encrypt with aes in ECB mode | ||||
|     Encrypt with aes in ECB mode. Using PKCS#7 padding | ||||
| 
 | ||||
|     @param {int[]} data        cleartext | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
| @@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None): | ||||
|     encrypted_data = [] | ||||
|     for i in range(block_count): | ||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||
|         encrypted_data += aes_encrypt(block, expanded_key) | ||||
|     encrypted_data = encrypted_data[:len(data)] | ||||
|         encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) | ||||
| 
 | ||||
|     return encrypted_data | ||||
| 
 | ||||
| @@ -551,5 +562,6 @@ __all__ = [ | ||||
| 
 | ||||
|     'key_expansion', | ||||
|     'pad_block', | ||||
|     'pkcs7_padding', | ||||
|     'unpad_pkcs7', | ||||
| ] | ||||
|   | ||||
| @@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ): | ||||
| 
 | ||||
| 
 | ||||
| compat_basestring = str | ||||
| compat_casefold = str.casefold | ||||
| compat_chr = chr | ||||
| compat_collections_abc = collections.abc | ||||
| compat_cookiejar = http.cookiejar | ||||
|   | ||||
| @@ -28,30 +28,34 @@ from ..utils import ( | ||||
| 
 | ||||
| 
 | ||||
| class ADNIE(InfoExtractor): | ||||
|     IE_DESC = 'Anime Digital Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||
|         'md5': '0319c99885ff5547565cacb4f3f9348d', | ||||
|     IE_DESC = 'Animation Digital Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', | ||||
|         'md5': '1c9ef066ceb302c86f80c2b371615261', | ||||
|         'info_dict': { | ||||
|             'id': '7778', | ||||
|             'id': '9841', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Blue Exorcist - Kyôto Saga - Episode 1', | ||||
|             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', | ||||
|             'series': 'Blue Exorcist - Kyôto Saga', | ||||
|             'duration': 1467, | ||||
|             'release_date': '20170106', | ||||
|             'title': 'Fruits Basket - Episode 1', | ||||
|             'description': 'md5:14be2f72c3c96809b0ca424b0097d336', | ||||
|             'series': 'Fruits Basket', | ||||
|             'duration': 1437, | ||||
|             'release_date': '20190405', | ||||
|             'comment_count': int, | ||||
|             'average_rating': float, | ||||
|             'season_number': 2, | ||||
|             'episode': 'Début des hostilités', | ||||
|             'season_number': 1, | ||||
|             'episode': 'À ce soir !', | ||||
|             'episode_number': 1, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|         'skip': 'Only available in region (FR, ...)', | ||||
|     }, { | ||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     _NETRC_MACHINE = 'animedigitalnetwork' | ||||
|     _BASE_URL = 'http://animedigitalnetwork.fr' | ||||
|     _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' | ||||
|     _NETRC_MACHINE = 'animationdigitalnetwork' | ||||
|     _BASE = 'animationdigitalnetwork.fr' | ||||
|     _API_BASE_URL = 'https://gw.api.' + _BASE + '/' | ||||
|     _PLAYER_BASE_URL = _API_BASE_URL + 'player/' | ||||
|     _HEADERS = {} | ||||
|     _LOGIN_ERR_MESSAGE = 'Unable to log in' | ||||
| @@ -75,11 +79,11 @@ class ADNIE(InfoExtractor): | ||||
|         if subtitle_location: | ||||
|             enc_subtitles = self._download_webpage( | ||||
|                 subtitle_location, video_id, 'Downloading subtitles data', | ||||
|                 fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) | ||||
|                 fatal=False, headers={'Origin': 'https://' + self._BASE}) | ||||
|         if not enc_subtitles: | ||||
|             return None | ||||
| 
 | ||||
|         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||
|         # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||
|         dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( | ||||
|             compat_b64decode(enc_subtitles[24:]), | ||||
|             binascii.unhexlify(self._K + '7fac1178830cfe0c'), | ||||
|   | ||||
| @@ -9,6 +9,7 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     sanitized_Request, | ||||
|     str_or_none, | ||||
|     traverse_obj, | ||||
|     urlencode_postdata, | ||||
|     USER_AGENTS, | ||||
| @@ -16,13 +17,13 @@ from ..utils import ( | ||||
| 
 | ||||
| 
 | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | ||||
|         'info_dict': { | ||||
|             'id': '61924494877028507', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hyde Park Civilizace: Bonus 01 - En', | ||||
|             'title': 'Bonus 01 - En - Hyde Park Civilizace', | ||||
|             'description': 'English Subtittles', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'duration': 81.3, | ||||
| @@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         # live stream | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | ||||
|         'url': 'http://www.ceskatelevize.cz/zive/ct1/', | ||||
|         'info_dict': { | ||||
|             'id': 402, | ||||
|             'id': '102', | ||||
|             'ext': 'mp4', | ||||
|             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'title': r'ČT1 - živé vysílání online', | ||||
|             'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Georestricted to Czech Republic', | ||||
|     }, { | ||||
|         # another | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | ||||
|         'only_matching': True, | ||||
|         'info_dict': { | ||||
|             'id': 402, | ||||
|             'ext': 'mp4', | ||||
|             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         # 'skip': 'Georestricted to Czech Republic', | ||||
|     }, { | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', | ||||
|         'only_matching': True, | ||||
| @@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | ||||
|         'info_dict': { | ||||
|             'id': '215562210900007-bogotart', | ||||
|             'title': 'Queer: Bogotart', | ||||
|             'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', | ||||
|             'title': 'Bogotart - Queer', | ||||
|             'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '61924494877311053', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Queer: Bogotart (Varování 18+)', | ||||
|                 'title': 'Bogotart - Queer (Varování 18+)', | ||||
|                 'duration': 11.9, | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '61924494877068022', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Queer: Bogotart (Queer)', | ||||
|                 'title': 'Bogotart - Queer (Queer)', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg', | ||||
|                 'duration': 1558.3, | ||||
|             }, | ||||
| @@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         parsed_url = compat_urllib_parse_urlparse(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) | ||||
|         webpage, urlh = self._download_webpage_handle(url, playlist_id) | ||||
|         parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) | ||||
|         site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') | ||||
|         playlist_title = self._og_search_title(webpage, default=None) | ||||
|         if site_name and playlist_title: | ||||
|             playlist_title = playlist_title.replace(f' — {site_name}', '', 1) | ||||
|             playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] | ||||
|         playlist_description = self._og_search_description(webpage, default=None) | ||||
|         if playlist_description: | ||||
|             playlist_description = playlist_description.replace('\xa0', ' ') | ||||
| 
 | ||||
|         if parsed_url.path.startswith('/porady/'): | ||||
|         type_ = 'IDEC' | ||||
|         if re.search(r'(^/porady|/zive)/', parsed_url.path): | ||||
|             next_data = self._search_nextjs_data(webpage, playlist_id) | ||||
|             idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | ||||
|             if '/zive/' in parsed_url.path: | ||||
|                 idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) | ||||
|             else: | ||||
|                 idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | ||||
|                 if not idec: | ||||
|                     idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) | ||||
|                     if idec: | ||||
|                         type_ = 'bonus' | ||||
|             if not idec: | ||||
|                 raise ExtractorError('Failed to find IDEC id') | ||||
|             iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) | ||||
|             webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, | ||||
|                                              query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) | ||||
|             iframe_hash = self._download_webpage( | ||||
|                 'https://www.ceskatelevize.cz/v-api/iframe-hash/', | ||||
|                 playlist_id, note='Getting IFRAME hash') | ||||
|             query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } | ||||
|             webpage = self._download_webpage( | ||||
|                 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', | ||||
|                 playlist_id, note='Downloading player', query=query) | ||||
| 
 | ||||
|         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||
|         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||
|             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | ||||
|             self.raise_geo_restricted(NOT_AVAILABLE_STRING) | ||||
|         if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): | ||||
|             raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) | ||||
| 
 | ||||
|         type_ = None | ||||
|         episode_id = None | ||||
| @@ -174,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|                 is_live = item.get('type') == 'LIVE' | ||||
|                 formats = [] | ||||
|                 for format_id, stream_url in item.get('streamUrls', {}).items(): | ||||
|                     stream_url = stream_url.replace('https://', 'http://') | ||||
|                     if 'playerType=flash' in stream_url: | ||||
|                         stream_formats = self._extract_m3u8_formats( | ||||
|                             stream_url, playlist_id, 'mp4', 'm3u8_native', | ||||
| @@ -196,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|                     entries[num]['formats'].extend(formats) | ||||
|                     continue | ||||
| 
 | ||||
|                 item_id = item.get('id') or item['assetId'] | ||||
|                 item_id = str_or_none(item.get('id') or item['assetId']) | ||||
|                 title = item['title'] | ||||
| 
 | ||||
|                 duration = float_or_none(item.get('duration')) | ||||
| @@ -227,6 +252,8 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|         for e in entries: | ||||
|             self._sort_formats(e['formats']) | ||||
| 
 | ||||
|         if len(entries) == 1: | ||||
|             return entries[0] | ||||
|         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||
| 
 | ||||
|     def _get_subtitles(self, episode_id, subs): | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     str_to_int, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
| 
 | ||||
| @@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor): | ||||
|             'id': '133957', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'everthing about me (Preview)', | ||||
|             'uploader': 'ellyxxix', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         # full video | ||||
|         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', | ||||
|         'md5': 'f3e8f7086409e9b470e2643edb96bdcc', | ||||
|         'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', | ||||
|         'info_dict': { | ||||
|             'id': '935718', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'MY FACE REVEAL', | ||||
|             'description': 'md5:ec5901d41808b3746fed90face161612', | ||||
|             'uploader': 'Sarah Calanthe', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
| @@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, ) | ||||
|         try: | ||||
|             webpage = self._download_webpage(real_url, video_id) | ||||
|         except Exception: | ||||
|             # probably useless fallback | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
| 
 | ||||
|         video_url = self._search_regex( | ||||
|             r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             webpage, 'video URL', group='url') | ||||
|         info = self._search_regex( | ||||
|             r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', | ||||
|             webpage, 'meta details', default='') | ||||
|         info = extract_attributes(info) | ||||
| 
 | ||||
|         title = self._html_search_regex( | ||||
|             (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', | ||||
|              r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), | ||||
|             webpage, 'title', default=None) or self._html_search_meta( | ||||
|             'twitter:title', webpage, 'title', fatal=True) | ||||
|         player = self._search_regex( | ||||
|             r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', | ||||
|             webpage, 'player details', default='') | ||||
|         player = extract_attributes(player) | ||||
| 
 | ||||
|         video_urls_and_ids = ( | ||||
|             (info.get('data-meta-video'), 'video'), | ||||
|             (player.get('data-video-transcoded'), 'transcoded'), | ||||
|             (player.get('data-video-filepath'), 'filepath'), | ||||
|             (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), | ||||
|         ) | ||||
| 
 | ||||
|         def txt_or_none(s, default=None): | ||||
|             return (s.strip() or default) if isinstance(s, compat_str) else default | ||||
| 
 | ||||
|         uploader = txt_or_none(info.get('data-meta-author')) | ||||
| 
 | ||||
|         def mung_title(s): | ||||
|             if uploader: | ||||
|                 s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s) | ||||
|             return txt_or_none(s) | ||||
| 
 | ||||
|         title = ( | ||||
|             mung_title(info.get('data-meta-title')) | ||||
|             or self._html_search_regex( | ||||
|                 (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', | ||||
|                  r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), | ||||
|                 webpage, 'title', default=None) | ||||
|             or self._html_search_meta( | ||||
|                 'twitter:title', webpage, 'title', fatal=True)) | ||||
| 
 | ||||
|         title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title | ||||
| 
 | ||||
|         if any(p in webpage for p in ('preview_videos', '_preview.mp4')): | ||||
|             title += ' (Preview)' | ||||
| @@ -59,7 +99,8 @@ class ManyVidsIE(InfoExtractor): | ||||
|             # Sets some cookies | ||||
|             self._download_webpage( | ||||
|                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', | ||||
|                 video_id, fatal=False, data=urlencode_postdata({ | ||||
|                 video_id, note='Setting format cookies', fatal=False, | ||||
|                 data=urlencode_postdata({ | ||||
|                     'mvtoken': mv_token, | ||||
|                     'vid': video_id, | ||||
|                 }), headers={ | ||||
| @@ -67,24 +108,56 @@ class ManyVidsIE(InfoExtractor): | ||||
|                     'X-Requested-With': 'XMLHttpRequest' | ||||
|                 }) | ||||
| 
 | ||||
|         if determine_ext(video_url) == 'm3u8': | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls') | ||||
|         else: | ||||
|             formats = [{'url': video_url}] | ||||
|         formats = [] | ||||
|         for v_url, fmt in video_urls_and_ids: | ||||
|             v_url = url_or_none(v_url) | ||||
|             if not v_url: | ||||
|                 continue | ||||
|             if determine_ext(v_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     v_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls')) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': v_url, | ||||
|                     'format_id': fmt, | ||||
|                 }) | ||||
| 
 | ||||
|         like_count = int_or_none(self._search_regex( | ||||
|             r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, | ||||
|             'view count', default=None)) | ||||
|         self._remove_duplicate_formats(formats) | ||||
| 
 | ||||
|         for f in formats: | ||||
|             if f.get('height') is None: | ||||
|                 f['height'] = int_or_none( | ||||
|                     self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) | ||||
|             if '/preview/' in f['url']: | ||||
|                 f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) | ||||
|                 f['preference'] = -10 | ||||
|             if 'transcoded' in f['format_id']: | ||||
|                 f['preference'] = f.get('preference', -1) - 1 | ||||
| 
 | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         def get_likes(): | ||||
|             likes = self._search_regex( | ||||
|                 r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), | ||||
|                 webpage, 'likes', default='') | ||||
|             likes = extract_attributes(likes) | ||||
|             return int_or_none(likes.get('data-likes')) | ||||
| 
 | ||||
|         def get_views(): | ||||
|             return str_to_int(self._html_search_regex( | ||||
|                 r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''', | ||||
|                 webpage, 'view count', default=None)) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'formats': formats, | ||||
|             'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'), | ||||
|             'description': txt_or_none(info.get('data-meta-description')), | ||||
|             'uploader': txt_or_none(info.get('data-meta-author')), | ||||
|             'thumbnail': ( | ||||
|                 url_or_none(info.get('data-meta-image')) | ||||
|                 or url_or_none(player.get('data-video-screenshot'))), | ||||
|             'view_count': get_views(), | ||||
|             'like_count': get_likes(), | ||||
|         } | ||||
|   | ||||
| @@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor): | ||||
|             'title': 'a/ Hot Teens', | ||||
|             'categories': list, | ||||
|             'upload_date': '20210104', | ||||
|             'uploader_id': 'yonbiw', | ||||
|             'uploader_id': 'anonymous', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
| @@ -123,11 +123,12 @@ class MotherlessIE(InfoExtractor): | ||||
|                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} | ||||
|                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') | ||||
| 
 | ||||
|         comment_count = webpage.count('class="media-comment-contents"') | ||||
|         comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage)) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             (r'"media-meta-member">\s+<a href="/m/([^"]+)"', | ||||
|              r'<span\b[^>]+\bclass="username">([^<]+)</span>'), | ||||
|             (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''', | ||||
|              r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''), | ||||
|             webpage, 'uploader_id', fatal=False) | ||||
| 
 | ||||
|         categories = self._html_search_meta('keywords', webpage, default=None) | ||||
|         if categories: | ||||
|             categories = [cat.strip() for cat in categories.split(',')] | ||||
| @@ -217,23 +218,23 @@ class MotherlessGroupIE(InfoExtractor): | ||||
|             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) | ||||
|         description = self._html_search_meta( | ||||
|             'description', webpage, fatal=False) | ||||
|         page_count = self._int(self._search_regex( | ||||
|             r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">', | ||||
|             webpage, 'page_count', default=0), 'page_count') | ||||
|         page_count = str_to_int(self._search_regex( | ||||
|             r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', | ||||
|             webpage, 'page_count', default=0)) | ||||
|         if not page_count: | ||||
|             message = self._search_regex( | ||||
|                 r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*', | ||||
|                 r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''', | ||||
|                 webpage, 'error_msg', default=None) or 'This group has no videos.' | ||||
|             self.report_warning(message, group_id) | ||||
|             page_count = 1 | ||||
|         PAGE_SIZE = 80 | ||||
| 
 | ||||
|         def _get_page(idx): | ||||
|             if not page_count: | ||||
|                 return | ||||
|             webpage = self._download_webpage( | ||||
|                 page_url, group_id, query={'page': idx + 1}, | ||||
|                 note='Downloading page %d/%d' % (idx + 1, page_count) | ||||
|             ) | ||||
|             if idx > 0: | ||||
|                 webpage = self._download_webpage( | ||||
|                     page_url, group_id, query={'page': idx + 1}, | ||||
|                     note='Downloading page %d/%d' % (idx + 1, page_count) | ||||
|                 ) | ||||
|             for entry in self._extract_entries(webpage, url): | ||||
|                 yield entry | ||||
| 
 | ||||
|   | ||||
| @@ -1,12 +1,25 @@ | ||||
| import itertools | ||||
| import json | ||||
| import re | ||||
| import time | ||||
| from base64 import b64encode | ||||
| from binascii import hexlify | ||||
| from datetime import datetime | ||||
| from hashlib import md5 | ||||
| from random import randint | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str, compat_urllib_parse_urlencode | ||||
| from ..utils import float_or_none, sanitized_Request | ||||
| from ..aes import aes_ecb_encrypt, pkcs7_padding | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     bytes_to_intlist, | ||||
|     error_to_compat_str, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     intlist_to_bytes, | ||||
|     sanitized_Request, | ||||
|     try_get, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class NetEaseMusicBaseIE(InfoExtractor): | ||||
| @@ -17,7 +30,7 @@ class NetEaseMusicBaseIE(InfoExtractor): | ||||
|     @classmethod | ||||
|     def _encrypt(cls, dfsid): | ||||
|         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) | ||||
|         string_bytes = bytearray(compat_str(dfsid).encode('ascii')) | ||||
|         string_bytes = bytearray(str(dfsid).encode('ascii')) | ||||
|         salt_len = len(salt_bytes) | ||||
|         for i in range(len(string_bytes)): | ||||
|             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] | ||||
| @@ -26,32 +39,106 @@ class NetEaseMusicBaseIE(InfoExtractor): | ||||
|         result = b64encode(m.digest()).decode('ascii') | ||||
|         return result.replace('/', '_').replace('+', '-') | ||||
| 
 | ||||
|     @classmethod | ||||
|     def make_player_api_request_data_and_headers(cls, song_id, bitrate): | ||||
|         KEY = b'e82ckenh8dichen8' | ||||
|         URL = '/api/song/enhance/player/url' | ||||
|         now = int(time.time() * 1000) | ||||
|         rand = randint(0, 1000) | ||||
|         cookie = { | ||||
|             'osver': None, | ||||
|             'deviceId': None, | ||||
|             'appver': '8.0.0', | ||||
|             'versioncode': '140', | ||||
|             'mobilename': None, | ||||
|             'buildver': '1623435496', | ||||
|             'resolution': '1920x1080', | ||||
|             '__csrf': '', | ||||
|             'os': 'pc', | ||||
|             'channel': None, | ||||
|             'requestId': '{0}_{1:04}'.format(now, rand), | ||||
|         } | ||||
|         request_text = json.dumps( | ||||
|             {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie}, | ||||
|             separators=(',', ':')) | ||||
|         message = 'nobody{0}use{1}md5forencrypt'.format( | ||||
|             URL, request_text).encode('latin1') | ||||
|         msg_digest = md5(message).hexdigest() | ||||
| 
 | ||||
|         data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format( | ||||
|             URL, request_text, msg_digest) | ||||
|         data = pkcs7_padding(bytes_to_intlist(data)) | ||||
|         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY))) | ||||
|         encrypted_params = hexlify(encrypted).decode('ascii').upper() | ||||
| 
 | ||||
|         cookie = '; '.join( | ||||
|             ['{0}={1}'.format(k, v if v is not None else 'undefined') | ||||
|              for [k, v] in cookie.items()]) | ||||
| 
 | ||||
|         headers = { | ||||
|             'User-Agent': self.extractor.get_param('http_headers')['User-Agent'], | ||||
|             'Content-Type': 'application/x-www-form-urlencoded', | ||||
|             'Referer': 'https://music.163.com', | ||||
|             'Cookie': cookie, | ||||
|         } | ||||
|         return ('params={0}'.format(encrypted_params), headers) | ||||
| 
 | ||||
|     def _call_player_api(self, song_id, bitrate): | ||||
|         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url' | ||||
|         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate) | ||||
|         try: | ||||
|             msg = 'empty result' | ||||
|             result = self._download_json( | ||||
|                 url, song_id, data=data.encode('ascii'), headers=headers) | ||||
|             if result: | ||||
|                 return result | ||||
|         except ExtractorError as e: | ||||
|             if type(e.cause) in (ValueError, TypeError): | ||||
|                 # JSON load failure | ||||
|                 raise | ||||
|         except Exception as e: | ||||
|             msg = error_to_compat_str(e) | ||||
|             self.report_warning('%s API call (%s) failed: %s' % ( | ||||
|                 song_id, bitrate, msg)) | ||||
|         return {} | ||||
| 
 | ||||
|     def extract_formats(self, info): | ||||
|         err = 0 | ||||
|         formats = [] | ||||
|         song_id = info['id'] | ||||
|         for song_format in self._FORMATS: | ||||
|             details = info.get(song_format) | ||||
|             if not details: | ||||
|                 continue | ||||
|             song_file_path = '/%s/%s.%s' % ( | ||||
|                 self._encrypt(details['dfsId']), details['dfsId'], details['extension']) | ||||
| 
 | ||||
|             # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature | ||||
|             # from NetEase's CDN provider that can be used if m5.music.126.net does not | ||||
|             # work, especially for users outside of Mainland China | ||||
|             # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 | ||||
|             for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', | ||||
|                          'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): | ||||
|                 song_url = host + song_file_path | ||||
|             bitrate = int_or_none(details.get('bitrate')) or 999000 | ||||
|             data = self._call_player_api(song_id, bitrate) | ||||
|             for song in try_get(data, lambda x: x['data'], list) or []: | ||||
|                 song_url = try_get(song, lambda x: x['url']) | ||||
|                 if not song_url: | ||||
|                     continue | ||||
|                 if self._is_valid_url(song_url, info['id'], 'song'): | ||||
|                     formats.append({ | ||||
|                         'url': song_url, | ||||
|                         'ext': details.get('extension'), | ||||
|                         'abr': float_or_none(details.get('bitrate'), scale=1000), | ||||
|                         'abr': float_or_none(song.get('br'), scale=1000), | ||||
|                         'format_id': song_format, | ||||
|                         'filesize': details.get('size'), | ||||
|                         'asr': details.get('sr') | ||||
|                         'filesize': int_or_none(song.get('size')), | ||||
|                         'asr': int_or_none(details.get('sr')), | ||||
|                     }) | ||||
|                     break | ||||
|                 elif err == 0: | ||||
|                     err = try_get(song, lambda x: x['code'], int) | ||||
| 
 | ||||
|         if not formats: | ||||
|             msg = 'No media links found' | ||||
|             if err != 0 and (err < 200 or err >= 400): | ||||
|                 raise ExtractorError( | ||||
|                     '%s (site code %d)' % (msg, err, ), expected=True) | ||||
|             else: | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg + ': probably this video is not available from your location due to geo restriction.', | ||||
|                     countries=['CN']) | ||||
| 
 | ||||
|         return formats | ||||
| 
 | ||||
|     @classmethod | ||||
| @@ -67,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor): | ||||
| class NetEaseMusicIE(NetEaseMusicBaseIE): | ||||
|     IE_NAME = 'netease:song' | ||||
|     IE_DESC = '网易云音乐' | ||||
|     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://music.163.com/#/song?id=32102397', | ||||
|         'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', | ||||
|         'md5': '3e909614ce09b1ccef4a3eb205441190', | ||||
|         'info_dict': { | ||||
|             'id': '32102397', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Bad Blood (feat. Kendrick Lamar)', | ||||
|             'title': 'Bad Blood', | ||||
|             'creator': 'Taylor Swift / Kendrick Lamar', | ||||
|             'upload_date': '20150517', | ||||
|             'timestamp': 1431878400, | ||||
|             'description': 'md5:a10a54589c2860300d02e1de821eb2ef', | ||||
|             'upload_date': '20150516', | ||||
|             'timestamp': 1431792000, | ||||
|             'description': 'md5:25fc5f27e47aad975aa6d36382c7833c', | ||||
|         }, | ||||
|         'skip': 'Blocked outside Mainland China', | ||||
|     }, { | ||||
|         'note': 'No lyrics translation.', | ||||
|         'url': 'http://music.163.com/#/song?id=29822014', | ||||
|         'info_dict': { | ||||
|             'id': '29822014', | ||||
|             'ext': 'mp3', | ||||
|             'title': '听见下雨的声音', | ||||
|             'creator': '周杰伦', | ||||
|             'upload_date': '20141225', | ||||
|             'timestamp': 1419523200, | ||||
|             'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c', | ||||
|         }, | ||||
|         'skip': 'Blocked outside Mainland China', | ||||
|     }, { | ||||
|         'note': 'No lyrics.', | ||||
|         'url': 'http://music.163.com/song?id=17241424', | ||||
| @@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): | ||||
|             'title': 'Opus 28', | ||||
|             'creator': 'Dustin O\'Halloran', | ||||
|             'upload_date': '20080211', | ||||
|             'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4', | ||||
|             'timestamp': 1202745600, | ||||
|         }, | ||||
|         'skip': 'Blocked outside Mainland China', | ||||
|     }, { | ||||
|         'note': 'Has translated name.', | ||||
|         'url': 'http://music.163.com/#/song?id=22735043', | ||||
| @@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): | ||||
|             'timestamp': 1264608000, | ||||
|             'alt_title': '说出愿望吧(Genie)', | ||||
|         }, | ||||
|         'skip': 'Blocked outside Mainland China', | ||||
|     }, { | ||||
|         'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', | ||||
|         'md5': '95826c73ea50b1c288b22180ec9e754d', | ||||
|         'info_dict': { | ||||
|             'id': '95670', | ||||
|             'ext': 'mp3', | ||||
|             'title': '国际歌', | ||||
|             'creator': '马备', | ||||
|             'upload_date': '19911130', | ||||
|             'timestamp': 691516800, | ||||
|             'description': 'md5:1ba2f911a2b0aa398479f595224f2141', | ||||
|         }, | ||||
|     }] | ||||
| 
 | ||||
|     def _process_lyrics(self, lyrics_info): | ||||
|   | ||||
| @@ -58,8 +58,7 @@ class NRKBaseIE(InfoExtractor): | ||||
|         return self._download_json( | ||||
|             urljoin('https://psapi.nrk.no/', path), | ||||
|             video_id, note or 'Downloading %s JSON' % item, | ||||
|             fatal=fatal, query=query, | ||||
|             headers={'Accept-Encoding': 'gzip, deflate, br'}) | ||||
|             fatal=fatal, query=query) | ||||
| 
 | ||||
| 
 | ||||
| class NRKIE(NRKBaseIE): | ||||
|   | ||||
| @@ -870,7 +870,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
| 
 | ||||
|         if '://player.vimeo.com/video/' in url: | ||||
|             config = self._parse_json(self._search_regex( | ||||
|                 r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) | ||||
|                 r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) | ||||
|             if config.get('view') == 4: | ||||
|                 config = self._verify_player_video_password( | ||||
|                     redirect_url, video_id, headers) | ||||
|   | ||||
| @@ -3,13 +3,14 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     NO_DEFAULT, | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     merge_dicts, | ||||
|     NO_DEFAULT, | ||||
|     orderedSet, | ||||
|     parse_codecs, | ||||
|     qualities, | ||||
|     traverse_obj, | ||||
| @@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE): | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', | ||||
|         'md5': '57af4423db0455a3975d2dc4578536bc', | ||||
|         'md5': '1b93bdec7d02fc0b703c5e7687461628', | ||||
|         'info_dict': { | ||||
|             'ext': 'mp4', | ||||
|             'id': 'video_funk_1770473', | ||||
| @@ -250,17 +251,15 @@ class ZDFIE(ZDFBaseIE): | ||||
|         title = content.get('title') or content['teaserHeadline'] | ||||
| 
 | ||||
|         t = content['mainVideoContent']['http://zdf.de/rels/target'] | ||||
| 
 | ||||
|         ptmd_path = t.get('http://zdf.de/rels/streams/ptmd') | ||||
| 
 | ||||
|         ptmd_path = traverse_obj(t, ( | ||||
|             (('streams', 'default'), None), | ||||
|             ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template') | ||||
|         ), get_all=False) | ||||
|         if not ptmd_path: | ||||
|             ptmd_path = traverse_obj( | ||||
|                 t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'), | ||||
|                 'http://zdf.de/rels/streams/ptmd-template').replace( | ||||
|                 '{playerId}', 'ngplayer_2_4') | ||||
|             raise ExtractorError('Could not extract ptmd_path') | ||||
| 
 | ||||
|         info = self._extract_ptmd( | ||||
|             urljoin(url, ptmd_path), video_id, player['apiToken'], url) | ||||
|             urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url) | ||||
| 
 | ||||
|         thumbnails = [] | ||||
|         layouts = try_get( | ||||
| @@ -309,15 +308,16 @@ class ZDFIE(ZDFBaseIE): | ||||
|             'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, | ||||
|             video_id) | ||||
| 
 | ||||
|         document = video['document'] | ||||
| 
 | ||||
|         title = document['titel'] | ||||
|         content_id = document['basename'] | ||||
| 
 | ||||
|         formats = [] | ||||
|         format_urls = set() | ||||
|         for f in document['formitaeten']: | ||||
|             self._extract_format(content_id, formats, format_urls, f) | ||||
|         formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) | ||||
|         document = formitaeten and video['document'] | ||||
|         if formitaeten: | ||||
|             title = document['titel'] | ||||
|             content_id = document['basename'] | ||||
| 
 | ||||
|             format_urls = set() | ||||
|             for f in formitaeten or []: | ||||
|                 self._extract_format(content_id, formats, format_urls, f) | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         thumbnails = [] | ||||
| @@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE): | ||||
|         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', | ||||
|         'info_dict': { | ||||
|             'id': 'das-aktuelle-sportstudio', | ||||
|             'title': 'das aktuelle sportstudio | ZDF', | ||||
|             'title': 'das aktuelle sportstudio', | ||||
|         }, | ||||
|         'playlist_mincount': 23, | ||||
|         'playlist_mincount': 18, | ||||
|     }, { | ||||
|         'url': 'https://www.zdf.de/dokumentation/planet-e', | ||||
|         'info_dict': { | ||||
| @@ -374,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE): | ||||
|             'title': 'planet e.', | ||||
|         }, | ||||
|         'playlist_mincount': 50, | ||||
|     }, { | ||||
|         'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest', | ||||
|         'info_dict': { | ||||
|             'id': 'aktenzeichen-xy-ungeloest', | ||||
|             'title': 'Aktenzeichen XY... ungelöst', | ||||
|             'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)", | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }, { | ||||
|         'url': 'https://www.zdf.de/filme/taunuskrimi/', | ||||
|         'only_matching': True, | ||||
| @@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE): | ||||
|     def suitable(cls, url): | ||||
|         return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) | ||||
| 
 | ||||
|     def _og_search_title(self, webpage, fatal=False): | ||||
|         title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) | ||||
|         return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, channel_id) | ||||
| 
 | ||||
|         entries = [ | ||||
|             self.url_result(item_url, ie=ZDFIE.ie_key()) | ||||
|             for item_url in orderedSet(re.findall( | ||||
|                 r'data-plusbar-url=["\'](http.+?\.html)', webpage))] | ||||
|         matches = re.finditer( | ||||
|             r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL, | ||||
|             webpage) | ||||
| 
 | ||||
|         return self.playlist_result( | ||||
|             entries, channel_id, self._og_search_title(webpage, fatal=False)) | ||||
|         if self._downloader.params.get('noplaylist', False): | ||||
|             entry = next( | ||||
|                 (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches), | ||||
|                 None) | ||||
|             self.to_screen('Downloading just the main video because of --no-playlist') | ||||
|             if entry: | ||||
|                 return entry | ||||
|         else: | ||||
|             self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) | ||||
| 
 | ||||
|         r""" | ||||
|         player = self._extract_player(webpage, channel_id) | ||||
|         def check_video(m): | ||||
|             v_ref = self._search_regex( | ||||
|                 r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), | ||||
|                 webpage, 'check id', default='') | ||||
|             v_ref = extract_attributes(v_ref) | ||||
|             return v_ref.get('data-target-video-type') != 'novideo' | ||||
| 
 | ||||
|         channel_id = self._search_regex( | ||||
|             r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage, | ||||
|             'channel id', group='id') | ||||
| 
 | ||||
|         channel = self._call_api( | ||||
|             'https://api.zdf.de/content/documents/%s.json' % channel_id, | ||||
|             player, url, channel_id) | ||||
| 
 | ||||
|         items = [] | ||||
|         for module in channel['module']: | ||||
|             for teaser in try_get(module, lambda x: x['teaser'], list) or []: | ||||
|                 t = try_get( | ||||
|                     teaser, lambda x: x['http://zdf.de/rels/target'], dict) | ||||
|                 if not t: | ||||
|                     continue | ||||
|                 items.extend(try_get( | ||||
|                     t, | ||||
|                     lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], | ||||
|                     list) or []) | ||||
|             items.extend(try_get( | ||||
|                 module, | ||||
|                 lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], | ||||
|                 list) or []) | ||||
| 
 | ||||
|         entries = [] | ||||
|         entry_urls = set() | ||||
|         for item in items: | ||||
|             t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) | ||||
|             if not t: | ||||
|                 continue | ||||
|             sharing_url = t.get('http://zdf.de/rels/sharing-url') | ||||
|             if not sharing_url or not isinstance(sharing_url, compat_str): | ||||
|                 continue | ||||
|             if sharing_url in entry_urls: | ||||
|                 continue | ||||
|             entry_urls.add(sharing_url) | ||||
|             entries.append(self.url_result( | ||||
|                 sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) | ||||
| 
 | ||||
|         return self.playlist_result(entries, channel_id, channel.get('title')) | ||||
|         """ | ||||
|         return self.playlist_from_matches( | ||||
|             (m.group('url') for m in matches if check_video(m)), | ||||
|             channel_id, self._og_search_title(webpage, fatal=False)) | ||||
|   | ||||
| @@ -685,7 +685,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): | ||||
|             return '\0_' | ||||
|         return char | ||||
| 
 | ||||
|     if restricted and is_id is NO_DEFAULT: | ||||
|     # Replace look-alike Unicode glyphs | ||||
|     if restricted and (is_id is NO_DEFAULT or not is_id): | ||||
|         s = unicodedata.normalize('NFKC', s) | ||||
|     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps | ||||
|     result = ''.join(map(replace_insane, s)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan