mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Update to ytdl-commit-de39d128
[extractor/ceskatelevize] Back-port extractor from yt-dlp
de39d1281c
Closes #5361, Closes #4634, Closes #5210
			
			
This commit is contained in:
		| @@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
| import base64 | import base64 | ||||||
| 
 | 
 | ||||||
| from yt_dlp.aes import ( | from yt_dlp.aes import ( | ||||||
|     BLOCK_SIZE_BYTES, |  | ||||||
|     aes_cbc_decrypt, |     aes_cbc_decrypt, | ||||||
|     aes_cbc_decrypt_bytes, |     aes_cbc_decrypt_bytes, | ||||||
|     aes_cbc_encrypt, |     aes_cbc_encrypt, | ||||||
| @@ -103,8 +102,7 @@ class TestAES(unittest.TestCase): | |||||||
| 
 | 
 | ||||||
|     def test_ecb_encrypt(self): |     def test_ecb_encrypt(self): | ||||||
|         data = bytes_to_intlist(self.secret_msg) |         data = bytes_to_intlist(self.secret_msg) | ||||||
|         data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) |         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) | ||||||
|         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) |  | ||||||
|         self.assertEqual( |         self.assertEqual( | ||||||
|             encrypted, |             encrypted, | ||||||
|             b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') |             b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') | ||||||
|   | |||||||
| @@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): | |||||||
|     return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) |     return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | BLOCK_SIZE_BYTES = 16 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def unpad_pkcs7(data): | def unpad_pkcs7(data): | ||||||
|     return data[:-compat_ord(data[-1])] |     return data[:-compat_ord(data[-1])] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| BLOCK_SIZE_BYTES = 16 | def pkcs7_padding(data): | ||||||
|  |     """ | ||||||
|  |     PKCS#7 padding | ||||||
|  | 
 | ||||||
|  |     @param {int[]} data        cleartext | ||||||
|  |     @returns {int[]}           padding data | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES | ||||||
|  |     return data + [remaining_length] * remaining_length | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def pad_block(block, padding_mode): | def pad_block(block, padding_mode): | ||||||
| @@ -64,7 +76,7 @@ def pad_block(block, padding_mode): | |||||||
| 
 | 
 | ||||||
| def aes_ecb_encrypt(data, key, iv=None): | def aes_ecb_encrypt(data, key, iv=None): | ||||||
|     """ |     """ | ||||||
|     Encrypt with aes in ECB mode |     Encrypt with aes in ECB mode. Using PKCS#7 padding | ||||||
| 
 | 
 | ||||||
|     @param {int[]} data        cleartext |     @param {int[]} data        cleartext | ||||||
|     @param {int[]} key         16/24/32-Byte cipher key |     @param {int[]} key         16/24/32-Byte cipher key | ||||||
| @@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None): | |||||||
|     encrypted_data = [] |     encrypted_data = [] | ||||||
|     for i in range(block_count): |     for i in range(block_count): | ||||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] |         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||||
|         encrypted_data += aes_encrypt(block, expanded_key) |         encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) | ||||||
|     encrypted_data = encrypted_data[:len(data)] |  | ||||||
| 
 | 
 | ||||||
|     return encrypted_data |     return encrypted_data | ||||||
| 
 | 
 | ||||||
| @@ -551,5 +562,6 @@ __all__ = [ | |||||||
| 
 | 
 | ||||||
|     'key_expansion', |     'key_expansion', | ||||||
|     'pad_block', |     'pad_block', | ||||||
|  |     'pkcs7_padding', | ||||||
|     'unpad_pkcs7', |     'unpad_pkcs7', | ||||||
| ] | ] | ||||||
|   | |||||||
| @@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| compat_basestring = str | compat_basestring = str | ||||||
|  | compat_casefold = str.casefold | ||||||
| compat_chr = chr | compat_chr = chr | ||||||
| compat_collections_abc = collections.abc | compat_collections_abc = collections.abc | ||||||
| compat_cookiejar = http.cookiejar | compat_cookiejar = http.cookiejar | ||||||
|   | |||||||
| @@ -28,30 +28,34 @@ from ..utils import ( | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ADNIE(InfoExtractor): | class ADNIE(InfoExtractor): | ||||||
|     IE_DESC = 'Anime Digital Network' |     IE_DESC = 'Animation Digital Network' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' |     _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||||
|     _TEST = { |     _TESTS = [{ | ||||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', |         'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', | ||||||
|         'md5': '0319c99885ff5547565cacb4f3f9348d', |         'md5': '1c9ef066ceb302c86f80c2b371615261', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '7778', |             'id': '9841', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Blue Exorcist - Kyôto Saga - Episode 1', |             'title': 'Fruits Basket - Episode 1', | ||||||
|             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', |             'description': 'md5:14be2f72c3c96809b0ca424b0097d336', | ||||||
|             'series': 'Blue Exorcist - Kyôto Saga', |             'series': 'Fruits Basket', | ||||||
|             'duration': 1467, |             'duration': 1437, | ||||||
|             'release_date': '20170106', |             'release_date': '20190405', | ||||||
|             'comment_count': int, |             'comment_count': int, | ||||||
|             'average_rating': float, |             'average_rating': float, | ||||||
|             'season_number': 2, |             'season_number': 1, | ||||||
|             'episode': 'Début des hostilités', |             'episode': 'À ce soir !', | ||||||
|             'episode_number': 1, |             'episode_number': 1, | ||||||
|         } |         }, | ||||||
|     } |         'skip': 'Only available in region (FR, ...)', | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }] | ||||||
| 
 | 
 | ||||||
|     _NETRC_MACHINE = 'animedigitalnetwork' |     _NETRC_MACHINE = 'animationdigitalnetwork' | ||||||
|     _BASE_URL = 'http://animedigitalnetwork.fr' |     _BASE = 'animationdigitalnetwork.fr' | ||||||
|     _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' |     _API_BASE_URL = 'https://gw.api.' + _BASE + '/' | ||||||
|     _PLAYER_BASE_URL = _API_BASE_URL + 'player/' |     _PLAYER_BASE_URL = _API_BASE_URL + 'player/' | ||||||
|     _HEADERS = {} |     _HEADERS = {} | ||||||
|     _LOGIN_ERR_MESSAGE = 'Unable to log in' |     _LOGIN_ERR_MESSAGE = 'Unable to log in' | ||||||
| @@ -75,11 +79,11 @@ class ADNIE(InfoExtractor): | |||||||
|         if subtitle_location: |         if subtitle_location: | ||||||
|             enc_subtitles = self._download_webpage( |             enc_subtitles = self._download_webpage( | ||||||
|                 subtitle_location, video_id, 'Downloading subtitles data', |                 subtitle_location, video_id, 'Downloading subtitles data', | ||||||
|                 fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) |                 fatal=False, headers={'Origin': 'https://' + self._BASE}) | ||||||
|         if not enc_subtitles: |         if not enc_subtitles: | ||||||
|             return None |             return None | ||||||
| 
 | 
 | ||||||
|         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js |         # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||||
|         dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( |         dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( | ||||||
|             compat_b64decode(enc_subtitles[24:]), |             compat_b64decode(enc_subtitles[24:]), | ||||||
|             binascii.unhexlify(self._K + '7fac1178830cfe0c'), |             binascii.unhexlify(self._K + '7fac1178830cfe0c'), | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|  |     str_or_none, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
|     USER_AGENTS, |     USER_AGENTS, | ||||||
| @@ -16,13 +17,13 @@ from ..utils import ( | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CeskaTelevizeIE(InfoExtractor): | class CeskaTelevizeIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' |     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', |         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '61924494877028507', |             'id': '61924494877028507', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Hyde Park Civilizace: Bonus 01 - En', |             'title': 'Bonus 01 - En - Hyde Park Civilizace', | ||||||
|             'description': 'English Subtittles', |             'description': 'English Subtittles', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg', |             'thumbnail': r're:^https?://.*\.jpg', | ||||||
|             'duration': 81.3, |             'duration': 81.3, | ||||||
| @@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         # live stream |         # live stream | ||||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', |         'url': 'http://www.ceskatelevize.cz/zive/ct1/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 402, |             'id': '102', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', |             'title': r'ČT1 - živé vysílání online', | ||||||
|  |             'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', | ||||||
|             'is_live': True, |             'is_live': True, | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|         'skip': 'Georestricted to Czech Republic', |     }, { | ||||||
|  |         # another | ||||||
|  |         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | ||||||
|  |         'only_matching': True, | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 402, | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||||
|  |             'is_live': True, | ||||||
|  |         }, | ||||||
|  |         # 'skip': 'Georestricted to Czech Republic', | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', |         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
|         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', |         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '215562210900007-bogotart', |             'id': '215562210900007-bogotart', | ||||||
|             'title': 'Queer: Bogotart', |             'title': 'Bogotart - Queer', | ||||||
|             'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', |             'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', | ||||||
|         }, |         }, | ||||||
|         'playlist': [{ |         'playlist': [{ | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '61924494877311053', |                 'id': '61924494877311053', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Queer: Bogotart (Varování 18+)', |                 'title': 'Bogotart - Queer (Varování 18+)', | ||||||
|                 'duration': 11.9, |                 'duration': 11.9, | ||||||
|             }, |             }, | ||||||
|         }, { |         }, { | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '61924494877068022', |                 'id': '61924494877068022', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Queer: Bogotart (Queer)', |                 'title': 'Bogotart - Queer (Queer)', | ||||||
|                 'thumbnail': r're:^https?://.*\.jpg', |                 'thumbnail': r're:^https?://.*\.jpg', | ||||||
|                 'duration': 1558.3, |                 'duration': 1558.3, | ||||||
|             }, |             }, | ||||||
| @@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         playlist_id = self._match_id(url) |         playlist_id = self._match_id(url) | ||||||
|         parsed_url = compat_urllib_parse_urlparse(url) |         webpage, urlh = self._download_webpage_handle(url, playlist_id) | ||||||
|         webpage = self._download_webpage(url, playlist_id) |         parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) | ||||||
|         site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) |         site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') | ||||||
|         playlist_title = self._og_search_title(webpage, default=None) |         playlist_title = self._og_search_title(webpage, default=None) | ||||||
|         if site_name and playlist_title: |         if site_name and playlist_title: | ||||||
|             playlist_title = playlist_title.replace(f' — {site_name}', '', 1) |             playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] | ||||||
|         playlist_description = self._og_search_description(webpage, default=None) |         playlist_description = self._og_search_description(webpage, default=None) | ||||||
|         if playlist_description: |         if playlist_description: | ||||||
|             playlist_description = playlist_description.replace('\xa0', ' ') |             playlist_description = playlist_description.replace('\xa0', ' ') | ||||||
| 
 | 
 | ||||||
|         if parsed_url.path.startswith('/porady/'): |         type_ = 'IDEC' | ||||||
|  |         if re.search(r'(^/porady|/zive)/', parsed_url.path): | ||||||
|             next_data = self._search_nextjs_data(webpage, playlist_id) |             next_data = self._search_nextjs_data(webpage, playlist_id) | ||||||
|  |             if '/zive/' in parsed_url.path: | ||||||
|  |                 idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) | ||||||
|  |             else: | ||||||
|                 idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) |                 idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) | ||||||
|  |                 if not idec: | ||||||
|  |                     idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) | ||||||
|  |                     if idec: | ||||||
|  |                         type_ = 'bonus' | ||||||
|             if not idec: |             if not idec: | ||||||
|                 raise ExtractorError('Failed to find IDEC id') |                 raise ExtractorError('Failed to find IDEC id') | ||||||
|             iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) |             iframe_hash = self._download_webpage( | ||||||
|             webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, |                 'https://www.ceskatelevize.cz/v-api/iframe-hash/', | ||||||
|                                              query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) |                 playlist_id, note='Getting IFRAME hash') | ||||||
|  |             query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } | ||||||
|  |             webpage = self._download_webpage( | ||||||
|  |                 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', | ||||||
|  |                 playlist_id, note='Downloading player', query=query) | ||||||
| 
 | 
 | ||||||
|         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' |         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||||
|         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: |         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||||
|             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) |             self.raise_geo_restricted(NOT_AVAILABLE_STRING) | ||||||
|  |         if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): | ||||||
|  |             raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) | ||||||
| 
 | 
 | ||||||
|         type_ = None |         type_ = None | ||||||
|         episode_id = None |         episode_id = None | ||||||
| @@ -174,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
|                 is_live = item.get('type') == 'LIVE' |                 is_live = item.get('type') == 'LIVE' | ||||||
|                 formats = [] |                 formats = [] | ||||||
|                 for format_id, stream_url in item.get('streamUrls', {}).items(): |                 for format_id, stream_url in item.get('streamUrls', {}).items(): | ||||||
|                     stream_url = stream_url.replace('https://', 'http://') |  | ||||||
|                     if 'playerType=flash' in stream_url: |                     if 'playerType=flash' in stream_url: | ||||||
|                         stream_formats = self._extract_m3u8_formats( |                         stream_formats = self._extract_m3u8_formats( | ||||||
|                             stream_url, playlist_id, 'mp4', 'm3u8_native', |                             stream_url, playlist_id, 'mp4', 'm3u8_native', | ||||||
| @@ -196,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
|                     entries[num]['formats'].extend(formats) |                     entries[num]['formats'].extend(formats) | ||||||
|                     continue |                     continue | ||||||
| 
 | 
 | ||||||
|                 item_id = item.get('id') or item['assetId'] |                 item_id = str_or_none(item.get('id') or item['assetId']) | ||||||
|                 title = item['title'] |                 title = item['title'] | ||||||
| 
 | 
 | ||||||
|                 duration = float_or_none(item.get('duration')) |                 duration = float_or_none(item.get('duration')) | ||||||
| @@ -227,6 +252,8 @@ class CeskaTelevizeIE(InfoExtractor): | |||||||
|         for e in entries: |         for e in entries: | ||||||
|             self._sort_formats(e['formats']) |             self._sort_formats(e['formats']) | ||||||
| 
 | 
 | ||||||
|  |         if len(entries) == 1: | ||||||
|  |             return entries[0] | ||||||
|         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) |         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||||
| 
 | 
 | ||||||
|     def _get_subtitles(self, episode_id, subs): |     def _get_subtitles(self, episode_id, subs): | ||||||
|   | |||||||
| @@ -1,8 +1,12 @@ | |||||||
|  | import re | ||||||
|  | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     determine_ext, |     determine_ext, | ||||||
|  |     extract_attributes, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     str_to_int, |     str_to_int, | ||||||
|  |     url_or_none, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| @@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor): | |||||||
|             'id': '133957', |             'id': '133957', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'everthing about me (Preview)', |             'title': 'everthing about me (Preview)', | ||||||
|  |             'uploader': 'ellyxxix', | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'like_count': int, |             'like_count': int, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         # full video |         # full video | ||||||
|         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', |         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', | ||||||
|         'md5': 'f3e8f7086409e9b470e2643edb96bdcc', |         'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '935718', |             'id': '935718', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'MY FACE REVEAL', |             'title': 'MY FACE REVEAL', | ||||||
|  |             'description': 'md5:ec5901d41808b3746fed90face161612', | ||||||
|  |             'uploader': 'Sarah Calanthe', | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'like_count': int, |             'like_count': int, | ||||||
|         }, |         }, | ||||||
| @@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
| 
 | 
 | ||||||
|  |         real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, ) | ||||||
|  |         try: | ||||||
|  |             webpage = self._download_webpage(real_url, video_id) | ||||||
|  |         except Exception: | ||||||
|  |             # probably useless fallback | ||||||
|             webpage = self._download_webpage(url, video_id) |             webpage = self._download_webpage(url, video_id) | ||||||
| 
 | 
 | ||||||
|         video_url = self._search_regex( |         info = self._search_regex( | ||||||
|             r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', |             r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', | ||||||
|             webpage, 'video URL', group='url') |             webpage, 'meta details', default='') | ||||||
|  |         info = extract_attributes(info) | ||||||
| 
 | 
 | ||||||
|         title = self._html_search_regex( |         player = self._search_regex( | ||||||
|  |             r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', | ||||||
|  |             webpage, 'player details', default='') | ||||||
|  |         player = extract_attributes(player) | ||||||
|  | 
 | ||||||
|  |         video_urls_and_ids = ( | ||||||
|  |             (info.get('data-meta-video'), 'video'), | ||||||
|  |             (player.get('data-video-transcoded'), 'transcoded'), | ||||||
|  |             (player.get('data-video-filepath'), 'filepath'), | ||||||
|  |             (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         def txt_or_none(s, default=None): | ||||||
|  |             return (s.strip() or default) if isinstance(s, compat_str) else default | ||||||
|  | 
 | ||||||
|  |         uploader = txt_or_none(info.get('data-meta-author')) | ||||||
|  | 
 | ||||||
|  |         def mung_title(s): | ||||||
|  |             if uploader: | ||||||
|  |                 s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s) | ||||||
|  |             return txt_or_none(s) | ||||||
|  | 
 | ||||||
|  |         title = ( | ||||||
|  |             mung_title(info.get('data-meta-title')) | ||||||
|  |             or self._html_search_regex( | ||||||
|                 (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', |                 (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', | ||||||
|                  r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), |                  r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), | ||||||
|             webpage, 'title', default=None) or self._html_search_meta( |                 webpage, 'title', default=None) | ||||||
|             'twitter:title', webpage, 'title', fatal=True) |             or self._html_search_meta( | ||||||
|  |                 'twitter:title', webpage, 'title', fatal=True)) | ||||||
|  | 
 | ||||||
|  |         title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title | ||||||
| 
 | 
 | ||||||
|         if any(p in webpage for p in ('preview_videos', '_preview.mp4')): |         if any(p in webpage for p in ('preview_videos', '_preview.mp4')): | ||||||
|             title += ' (Preview)' |             title += ' (Preview)' | ||||||
| @@ -59,7 +99,8 @@ class ManyVidsIE(InfoExtractor): | |||||||
|             # Sets some cookies |             # Sets some cookies | ||||||
|             self._download_webpage( |             self._download_webpage( | ||||||
|                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', |                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', | ||||||
|                 video_id, fatal=False, data=urlencode_postdata({ |                 video_id, note='Setting format cookies', fatal=False, | ||||||
|  |                 data=urlencode_postdata({ | ||||||
|                     'mvtoken': mv_token, |                     'mvtoken': mv_token, | ||||||
|                     'vid': video_id, |                     'vid': video_id, | ||||||
|                 }), headers={ |                 }), headers={ | ||||||
| @@ -67,24 +108,56 @@ class ManyVidsIE(InfoExtractor): | |||||||
|                     'X-Requested-With': 'XMLHttpRequest' |                     'X-Requested-With': 'XMLHttpRequest' | ||||||
|                 }) |                 }) | ||||||
| 
 | 
 | ||||||
|         if determine_ext(video_url) == 'm3u8': |         formats = [] | ||||||
|             formats = self._extract_m3u8_formats( |         for v_url, fmt in video_urls_and_ids: | ||||||
|                 video_url, video_id, 'mp4', entry_protocol='m3u8_native', |             v_url = url_or_none(v_url) | ||||||
|                 m3u8_id='hls') |             if not v_url: | ||||||
|  |                 continue | ||||||
|  |             if determine_ext(v_url) == 'm3u8': | ||||||
|  |                 formats.extend(self._extract_m3u8_formats( | ||||||
|  |                     v_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||||
|  |                     m3u8_id='hls')) | ||||||
|             else: |             else: | ||||||
|             formats = [{'url': video_url}] |                 formats.append({ | ||||||
|  |                     'url': v_url, | ||||||
|  |                     'format_id': fmt, | ||||||
|  |                 }) | ||||||
| 
 | 
 | ||||||
|         like_count = int_or_none(self._search_regex( |         self._remove_duplicate_formats(formats) | ||||||
|             r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) | 
 | ||||||
|         view_count = str_to_int(self._html_search_regex( |         for f in formats: | ||||||
|             r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, |             if f.get('height') is None: | ||||||
|             'view count', default=None)) |                 f['height'] = int_or_none( | ||||||
|  |                     self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) | ||||||
|  |             if '/preview/' in f['url']: | ||||||
|  |                 f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) | ||||||
|  |                 f['preference'] = -10 | ||||||
|  |             if 'transcoded' in f['format_id']: | ||||||
|  |                 f['preference'] = f.get('preference', -1) - 1 | ||||||
|  | 
 | ||||||
|  |         self._sort_formats(formats) | ||||||
|  | 
 | ||||||
|  |         def get_likes(): | ||||||
|  |             likes = self._search_regex( | ||||||
|  |                 r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), | ||||||
|  |                 webpage, 'likes', default='') | ||||||
|  |             likes = extract_attributes(likes) | ||||||
|  |             return int_or_none(likes.get('data-likes')) | ||||||
|  | 
 | ||||||
|  |         def get_views(): | ||||||
|  |             return str_to_int(self._html_search_regex( | ||||||
|  |                 r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''', | ||||||
|  |                 webpage, 'view count', default=None)) | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'view_count': view_count, |  | ||||||
|             'like_count': like_count, |  | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'), |             'description': txt_or_none(info.get('data-meta-description')), | ||||||
|  |             'uploader': txt_or_none(info.get('data-meta-author')), | ||||||
|  |             'thumbnail': ( | ||||||
|  |                 url_or_none(info.get('data-meta-image')) | ||||||
|  |                 or url_or_none(player.get('data-video-screenshot'))), | ||||||
|  |             'view_count': get_views(), | ||||||
|  |             'like_count': get_likes(), | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor): | |||||||
|             'title': 'a/ Hot Teens', |             'title': 'a/ Hot Teens', | ||||||
|             'categories': list, |             'categories': list, | ||||||
|             'upload_date': '20210104', |             'upload_date': '20210104', | ||||||
|             'uploader_id': 'yonbiw', |             'uploader_id': 'anonymous', | ||||||
|             'thumbnail': r're:https?://.*\.jpg', |             'thumbnail': r're:https?://.*\.jpg', | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|         }, |         }, | ||||||
| @@ -123,11 +123,12 @@ class MotherlessIE(InfoExtractor): | |||||||
|                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} |                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} | ||||||
|                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') |                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') | ||||||
| 
 | 
 | ||||||
|         comment_count = webpage.count('class="media-comment-contents"') |         comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage)) | ||||||
|         uploader_id = self._html_search_regex( |         uploader_id = self._html_search_regex( | ||||||
|             (r'"media-meta-member">\s+<a href="/m/([^"]+)"', |             (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''', | ||||||
|              r'<span\b[^>]+\bclass="username">([^<]+)</span>'), |              r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''), | ||||||
|             webpage, 'uploader_id', fatal=False) |             webpage, 'uploader_id', fatal=False) | ||||||
|  | 
 | ||||||
|         categories = self._html_search_meta('keywords', webpage, default=None) |         categories = self._html_search_meta('keywords', webpage, default=None) | ||||||
|         if categories: |         if categories: | ||||||
|             categories = [cat.strip() for cat in categories.split(',')] |             categories = [cat.strip() for cat in categories.split(',')] | ||||||
| @@ -217,19 +218,19 @@ class MotherlessGroupIE(InfoExtractor): | |||||||
|             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) |             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) | ||||||
|         description = self._html_search_meta( |         description = self._html_search_meta( | ||||||
|             'description', webpage, fatal=False) |             'description', webpage, fatal=False) | ||||||
|         page_count = self._int(self._search_regex( |         page_count = str_to_int(self._search_regex( | ||||||
|             r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">', |             r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', | ||||||
|             webpage, 'page_count', default=0), 'page_count') |             webpage, 'page_count', default=0)) | ||||||
|         if not page_count: |         if not page_count: | ||||||
|             message = self._search_regex( |             message = self._search_regex( | ||||||
|                 r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*', |                 r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''', | ||||||
|                 webpage, 'error_msg', default=None) or 'This group has no videos.' |                 webpage, 'error_msg', default=None) or 'This group has no videos.' | ||||||
|             self.report_warning(message, group_id) |             self.report_warning(message, group_id) | ||||||
|  |             page_count = 1 | ||||||
|         PAGE_SIZE = 80 |         PAGE_SIZE = 80 | ||||||
| 
 | 
 | ||||||
|         def _get_page(idx): |         def _get_page(idx): | ||||||
|             if not page_count: |             if idx > 0: | ||||||
|                 return |  | ||||||
|                 webpage = self._download_webpage( |                 webpage = self._download_webpage( | ||||||
|                     page_url, group_id, query={'page': idx + 1}, |                     page_url, group_id, query={'page': idx + 1}, | ||||||
|                     note='Downloading page %d/%d' % (idx + 1, page_count) |                     note='Downloading page %d/%d' % (idx + 1, page_count) | ||||||
|   | |||||||
| @@ -1,12 +1,25 @@ | |||||||
| import itertools | import json | ||||||
| import re | import re | ||||||
|  | import time | ||||||
| from base64 import b64encode | from base64 import b64encode | ||||||
|  | from binascii import hexlify | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from hashlib import md5 | from hashlib import md5 | ||||||
|  | from random import randint | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str, compat_urllib_parse_urlencode | from ..aes import aes_ecb_encrypt, pkcs7_padding | ||||||
| from ..utils import float_or_none, sanitized_Request | from ..compat import compat_urllib_parse_urlencode | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     bytes_to_intlist, | ||||||
|  |     error_to_compat_str, | ||||||
|  |     float_or_none, | ||||||
|  |     int_or_none, | ||||||
|  |     intlist_to_bytes, | ||||||
|  |     sanitized_Request, | ||||||
|  |     try_get, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class NetEaseMusicBaseIE(InfoExtractor): | class NetEaseMusicBaseIE(InfoExtractor): | ||||||
| @@ -17,7 +30,7 @@ class NetEaseMusicBaseIE(InfoExtractor): | |||||||
|     @classmethod |     @classmethod | ||||||
|     def _encrypt(cls, dfsid): |     def _encrypt(cls, dfsid): | ||||||
|         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) |         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) | ||||||
|         string_bytes = bytearray(compat_str(dfsid).encode('ascii')) |         string_bytes = bytearray(str(dfsid).encode('ascii')) | ||||||
|         salt_len = len(salt_bytes) |         salt_len = len(salt_bytes) | ||||||
|         for i in range(len(string_bytes)): |         for i in range(len(string_bytes)): | ||||||
|             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] |             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] | ||||||
| @@ -26,32 +39,106 @@ class NetEaseMusicBaseIE(InfoExtractor): | |||||||
|         result = b64encode(m.digest()).decode('ascii') |         result = b64encode(m.digest()).decode('ascii') | ||||||
|         return result.replace('/', '_').replace('+', '-') |         return result.replace('/', '_').replace('+', '-') | ||||||
| 
 | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def make_player_api_request_data_and_headers(cls, song_id, bitrate): | ||||||
|  |         KEY = b'e82ckenh8dichen8' | ||||||
|  |         URL = '/api/song/enhance/player/url' | ||||||
|  |         now = int(time.time() * 1000) | ||||||
|  |         rand = randint(0, 1000) | ||||||
|  |         cookie = { | ||||||
|  |             'osver': None, | ||||||
|  |             'deviceId': None, | ||||||
|  |             'appver': '8.0.0', | ||||||
|  |             'versioncode': '140', | ||||||
|  |             'mobilename': None, | ||||||
|  |             'buildver': '1623435496', | ||||||
|  |             'resolution': '1920x1080', | ||||||
|  |             '__csrf': '', | ||||||
|  |             'os': 'pc', | ||||||
|  |             'channel': None, | ||||||
|  |             'requestId': '{0}_{1:04}'.format(now, rand), | ||||||
|  |         } | ||||||
|  |         request_text = json.dumps( | ||||||
|  |             {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie}, | ||||||
|  |             separators=(',', ':')) | ||||||
|  |         message = 'nobody{0}use{1}md5forencrypt'.format( | ||||||
|  |             URL, request_text).encode('latin1') | ||||||
|  |         msg_digest = md5(message).hexdigest() | ||||||
|  | 
 | ||||||
|  |         data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format( | ||||||
|  |             URL, request_text, msg_digest) | ||||||
|  |         data = pkcs7_padding(bytes_to_intlist(data)) | ||||||
|  |         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY))) | ||||||
|  |         encrypted_params = hexlify(encrypted).decode('ascii').upper() | ||||||
|  | 
 | ||||||
|  |         cookie = '; '.join( | ||||||
|  |             ['{0}={1}'.format(k, v if v is not None else 'undefined') | ||||||
|  |              for [k, v] in cookie.items()]) | ||||||
|  | 
 | ||||||
|  |         headers = { | ||||||
|  |             'User-Agent': self.extractor.get_param('http_headers')['User-Agent'], | ||||||
|  |             'Content-Type': 'application/x-www-form-urlencoded', | ||||||
|  |             'Referer': 'https://music.163.com', | ||||||
|  |             'Cookie': cookie, | ||||||
|  |         } | ||||||
|  |         return ('params={0}'.format(encrypted_params), headers) | ||||||
|  | 
 | ||||||
|  |     def _call_player_api(self, song_id, bitrate): | ||||||
|  |         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url' | ||||||
|  |         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate) | ||||||
|  |         try: | ||||||
|  |             msg = 'empty result' | ||||||
|  |             result = self._download_json( | ||||||
|  |                 url, song_id, data=data.encode('ascii'), headers=headers) | ||||||
|  |             if result: | ||||||
|  |                 return result | ||||||
|  |         except ExtractorError as e: | ||||||
|  |             if type(e.cause) in (ValueError, TypeError): | ||||||
|  |                 # JSON load failure | ||||||
|  |                 raise | ||||||
|  |         except Exception as e: | ||||||
|  |             msg = error_to_compat_str(e) | ||||||
|  |             self.report_warning('%s API call (%s) failed: %s' % ( | ||||||
|  |                 song_id, bitrate, msg)) | ||||||
|  |         return {} | ||||||
|  | 
 | ||||||
|     def extract_formats(self, info): |     def extract_formats(self, info): | ||||||
|  |         err = 0 | ||||||
|         formats = [] |         formats = [] | ||||||
|  |         song_id = info['id'] | ||||||
|         for song_format in self._FORMATS: |         for song_format in self._FORMATS: | ||||||
|             details = info.get(song_format) |             details = info.get(song_format) | ||||||
|             if not details: |             if not details: | ||||||
|                 continue |                 continue | ||||||
|             song_file_path = '/%s/%s.%s' % ( |  | ||||||
|                 self._encrypt(details['dfsId']), details['dfsId'], details['extension']) |  | ||||||
| 
 | 
 | ||||||
|             # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature |             bitrate = int_or_none(details.get('bitrate')) or 999000 | ||||||
|             # from NetEase's CDN provider that can be used if m5.music.126.net does not |             data = self._call_player_api(song_id, bitrate) | ||||||
|             # work, especially for users outside of Mainland China |             for song in try_get(data, lambda x: x['data'], list) or []: | ||||||
|             # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 |                 song_url = try_get(song, lambda x: x['url']) | ||||||
|             for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', |                 if not song_url: | ||||||
|                          'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): |                     continue | ||||||
|                 song_url = host + song_file_path |  | ||||||
|                 if self._is_valid_url(song_url, info['id'], 'song'): |                 if self._is_valid_url(song_url, info['id'], 'song'): | ||||||
|                     formats.append({ |                     formats.append({ | ||||||
|                         'url': song_url, |                         'url': song_url, | ||||||
|                         'ext': details.get('extension'), |                         'ext': details.get('extension'), | ||||||
|                         'abr': float_or_none(details.get('bitrate'), scale=1000), |                         'abr': float_or_none(song.get('br'), scale=1000), | ||||||
|                         'format_id': song_format, |                         'format_id': song_format, | ||||||
|                         'filesize': details.get('size'), |                         'filesize': int_or_none(song.get('size')), | ||||||
|                         'asr': details.get('sr') |                         'asr': int_or_none(details.get('sr')), | ||||||
|                     }) |                     }) | ||||||
|                     break |                 elif err == 0: | ||||||
|  |                     err = try_get(song, lambda x: x['code'], int) | ||||||
|  | 
 | ||||||
|  |         if not formats: | ||||||
|  |             msg = 'No media links found' | ||||||
|  |             if err != 0 and (err < 200 or err >= 400): | ||||||
|  |                 raise ExtractorError( | ||||||
|  |                     '%s (site code %d)' % (msg, err, ), expected=True) | ||||||
|  |             else: | ||||||
|  |                 self.raise_geo_restricted( | ||||||
|  |                     msg + ': probably this video is not available from your location due to geo restriction.', | ||||||
|  |                     countries=['CN']) | ||||||
|  | 
 | ||||||
|         return formats |         return formats | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
| @@ -67,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor): | |||||||
| class NetEaseMusicIE(NetEaseMusicBaseIE): | class NetEaseMusicIE(NetEaseMusicBaseIE): | ||||||
|     IE_NAME = 'netease:song' |     IE_NAME = 'netease:song' | ||||||
|     IE_DESC = '网易云音乐' |     IE_DESC = '网易云音乐' | ||||||
|     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' |     _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://music.163.com/#/song?id=32102397', |         'url': 'http://music.163.com/#/song?id=32102397', | ||||||
|         'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', |         'md5': '3e909614ce09b1ccef4a3eb205441190', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '32102397', |             'id': '32102397', | ||||||
|             'ext': 'mp3', |             'ext': 'mp3', | ||||||
|             'title': 'Bad Blood (feat. Kendrick Lamar)', |             'title': 'Bad Blood', | ||||||
|             'creator': 'Taylor Swift / Kendrick Lamar', |             'creator': 'Taylor Swift / Kendrick Lamar', | ||||||
|             'upload_date': '20150517', |             'upload_date': '20150516', | ||||||
|             'timestamp': 1431878400, |             'timestamp': 1431792000, | ||||||
|             'description': 'md5:a10a54589c2860300d02e1de821eb2ef', |             'description': 'md5:25fc5f27e47aad975aa6d36382c7833c', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Blocked outside Mainland China', |  | ||||||
|     }, { |  | ||||||
|         'note': 'No lyrics translation.', |  | ||||||
|         'url': 'http://music.163.com/#/song?id=29822014', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '29822014', |  | ||||||
|             'ext': 'mp3', |  | ||||||
|             'title': '听见下雨的声音', |  | ||||||
|             'creator': '周杰伦', |  | ||||||
|             'upload_date': '20141225', |  | ||||||
|             'timestamp': 1419523200, |  | ||||||
|             'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c', |  | ||||||
|         }, |  | ||||||
|         'skip': 'Blocked outside Mainland China', |  | ||||||
|     }, { |     }, { | ||||||
|         'note': 'No lyrics.', |         'note': 'No lyrics.', | ||||||
|         'url': 'http://music.163.com/song?id=17241424', |         'url': 'http://music.163.com/song?id=17241424', | ||||||
| @@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): | |||||||
|             'title': 'Opus 28', |             'title': 'Opus 28', | ||||||
|             'creator': 'Dustin O\'Halloran', |             'creator': 'Dustin O\'Halloran', | ||||||
|             'upload_date': '20080211', |             'upload_date': '20080211', | ||||||
|  |             'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4', | ||||||
|             'timestamp': 1202745600, |             'timestamp': 1202745600, | ||||||
|         }, |         }, | ||||||
|         'skip': 'Blocked outside Mainland China', |  | ||||||
|     }, { |     }, { | ||||||
|         'note': 'Has translated name.', |         'note': 'Has translated name.', | ||||||
|         'url': 'http://music.163.com/#/song?id=22735043', |         'url': 'http://music.163.com/#/song?id=22735043', | ||||||
| @@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): | |||||||
|             'timestamp': 1264608000, |             'timestamp': 1264608000, | ||||||
|             'alt_title': '说出愿望吧(Genie)', |             'alt_title': '说出愿望吧(Genie)', | ||||||
|         }, |         }, | ||||||
|         'skip': 'Blocked outside Mainland China', |     }, { | ||||||
|  |         'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', | ||||||
|  |         'md5': '95826c73ea50b1c288b22180ec9e754d', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '95670', | ||||||
|  |             'ext': 'mp3', | ||||||
|  |             'title': '国际歌', | ||||||
|  |             'creator': '马备', | ||||||
|  |             'upload_date': '19911130', | ||||||
|  |             'timestamp': 691516800, | ||||||
|  |             'description': 'md5:1ba2f911a2b0aa398479f595224f2141', | ||||||
|  |         }, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _process_lyrics(self, lyrics_info): |     def _process_lyrics(self, lyrics_info): | ||||||
|   | |||||||
| @@ -58,8 +58,7 @@ class NRKBaseIE(InfoExtractor): | |||||||
|         return self._download_json( |         return self._download_json( | ||||||
|             urljoin('https://psapi.nrk.no/', path), |             urljoin('https://psapi.nrk.no/', path), | ||||||
|             video_id, note or 'Downloading %s JSON' % item, |             video_id, note or 'Downloading %s JSON' % item, | ||||||
|             fatal=fatal, query=query, |             fatal=fatal, query=query) | ||||||
|             headers={'Accept-Encoding': 'gzip, deflate, br'}) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class NRKIE(NRKBaseIE): | class NRKIE(NRKBaseIE): | ||||||
|   | |||||||
| @@ -870,7 +870,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | |||||||
| 
 | 
 | ||||||
|         if '://player.vimeo.com/video/' in url: |         if '://player.vimeo.com/video/' in url: | ||||||
|             config = self._parse_json(self._search_regex( |             config = self._parse_json(self._search_regex( | ||||||
|                 r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) |                 r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) | ||||||
|             if config.get('view') == 4: |             if config.get('view') == 4: | ||||||
|                 config = self._verify_player_video_password( |                 config = self._verify_player_video_password( | ||||||
|                     redirect_url, video_id, headers) |                     redirect_url, video_id, headers) | ||||||
|   | |||||||
| @@ -3,13 +3,14 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_str | from ..compat import compat_str | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     NO_DEFAULT, | ||||||
|  |     ExtractorError, | ||||||
|     determine_ext, |     determine_ext, | ||||||
|  |     extract_attributes, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     join_nonempty, |     join_nonempty, | ||||||
|     merge_dicts, |     merge_dicts, | ||||||
|     NO_DEFAULT, |  | ||||||
|     orderedSet, |  | ||||||
|     parse_codecs, |     parse_codecs, | ||||||
|     qualities, |     qualities, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
| @@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE): | |||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', |         'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', | ||||||
|         'md5': '57af4423db0455a3975d2dc4578536bc', |         'md5': '1b93bdec7d02fc0b703c5e7687461628', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'id': 'video_funk_1770473', |             'id': 'video_funk_1770473', | ||||||
| @@ -250,17 +251,15 @@ class ZDFIE(ZDFBaseIE): | |||||||
|         title = content.get('title') or content['teaserHeadline'] |         title = content.get('title') or content['teaserHeadline'] | ||||||
| 
 | 
 | ||||||
|         t = content['mainVideoContent']['http://zdf.de/rels/target'] |         t = content['mainVideoContent']['http://zdf.de/rels/target'] | ||||||
| 
 |         ptmd_path = traverse_obj(t, ( | ||||||
|         ptmd_path = t.get('http://zdf.de/rels/streams/ptmd') |             (('streams', 'default'), None), | ||||||
| 
 |             ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template') | ||||||
|  |         ), get_all=False) | ||||||
|         if not ptmd_path: |         if not ptmd_path: | ||||||
|             ptmd_path = traverse_obj( |             raise ExtractorError('Could not extract ptmd_path') | ||||||
|                 t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'), |  | ||||||
|                 'http://zdf.de/rels/streams/ptmd-template').replace( |  | ||||||
|                 '{playerId}', 'ngplayer_2_4') |  | ||||||
| 
 | 
 | ||||||
|         info = self._extract_ptmd( |         info = self._extract_ptmd( | ||||||
|             urljoin(url, ptmd_path), video_id, player['apiToken'], url) |             urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url) | ||||||
| 
 | 
 | ||||||
|         thumbnails = [] |         thumbnails = [] | ||||||
|         layouts = try_get( |         layouts = try_get( | ||||||
| @@ -309,14 +308,15 @@ class ZDFIE(ZDFBaseIE): | |||||||
|             'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, |             'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, | ||||||
|             video_id) |             video_id) | ||||||
| 
 | 
 | ||||||
|         document = video['document'] |         formats = [] | ||||||
| 
 |         formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) | ||||||
|  |         document = formitaeten and video['document'] | ||||||
|  |         if formitaeten: | ||||||
|             title = document['titel'] |             title = document['titel'] | ||||||
|             content_id = document['basename'] |             content_id = document['basename'] | ||||||
| 
 | 
 | ||||||
|         formats = [] |  | ||||||
|             format_urls = set() |             format_urls = set() | ||||||
|         for f in document['formitaeten']: |             for f in formitaeten or []: | ||||||
|                 self._extract_format(content_id, formats, format_urls, f) |                 self._extract_format(content_id, formats, format_urls, f) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
| 
 | 
 | ||||||
| @@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE): | |||||||
|         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', |         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'das-aktuelle-sportstudio', |             'id': 'das-aktuelle-sportstudio', | ||||||
|             'title': 'das aktuelle sportstudio | ZDF', |             'title': 'das aktuelle sportstudio', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 23, |         'playlist_mincount': 18, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.zdf.de/dokumentation/planet-e', |         'url': 'https://www.zdf.de/dokumentation/planet-e', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -374,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE): | |||||||
|             'title': 'planet e.', |             'title': 'planet e.', | ||||||
|         }, |         }, | ||||||
|         'playlist_mincount': 50, |         'playlist_mincount': 50, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 'aktenzeichen-xy-ungeloest', | ||||||
|  |             'title': 'Aktenzeichen XY... ungelöst', | ||||||
|  |             'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)", | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 2, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.zdf.de/filme/taunuskrimi/', |         'url': 'https://www.zdf.de/filme/taunuskrimi/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
| @@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE): | |||||||
|     def suitable(cls, url): |     def suitable(cls, url): | ||||||
|         return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) |         return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) | ||||||
| 
 | 
 | ||||||
|  |     def _og_search_title(self, webpage, fatal=False): | ||||||
|  |         title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) | ||||||
|  |         return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None | ||||||
|  | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         channel_id = self._match_id(url) |         channel_id = self._match_id(url) | ||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, channel_id) |         webpage = self._download_webpage(url, channel_id) | ||||||
| 
 | 
 | ||||||
|         entries = [ |         matches = re.finditer( | ||||||
|             self.url_result(item_url, ie=ZDFIE.ie_key()) |             r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL, | ||||||
|             for item_url in orderedSet(re.findall( |             webpage) | ||||||
|                 r'data-plusbar-url=["\'](http.+?\.html)', webpage))] |  | ||||||
| 
 | 
 | ||||||
|         return self.playlist_result( |         if self._downloader.params.get('noplaylist', False): | ||||||
|             entries, channel_id, self._og_search_title(webpage, fatal=False)) |             entry = next( | ||||||
|  |                 (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches), | ||||||
|  |                 None) | ||||||
|  |             self.to_screen('Downloading just the main video because of --no-playlist') | ||||||
|  |             if entry: | ||||||
|  |                 return entry | ||||||
|  |         else: | ||||||
|  |             self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) | ||||||
| 
 | 
 | ||||||
|         r""" |         def check_video(m): | ||||||
|         player = self._extract_player(webpage, channel_id) |             v_ref = self._search_regex( | ||||||
|  |                 r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), | ||||||
|  |                 webpage, 'check id', default='') | ||||||
|  |             v_ref = extract_attributes(v_ref) | ||||||
|  |             return v_ref.get('data-target-video-type') != 'novideo' | ||||||
| 
 | 
 | ||||||
|         channel_id = self._search_regex( |         return self.playlist_from_matches( | ||||||
|             r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage, |             (m.group('url') for m in matches if check_video(m)), | ||||||
|             'channel id', group='id') |             channel_id, self._og_search_title(webpage, fatal=False)) | ||||||
| 
 |  | ||||||
|         channel = self._call_api( |  | ||||||
|             'https://api.zdf.de/content/documents/%s.json' % channel_id, |  | ||||||
|             player, url, channel_id) |  | ||||||
| 
 |  | ||||||
|         items = [] |  | ||||||
|         for module in channel['module']: |  | ||||||
|             for teaser in try_get(module, lambda x: x['teaser'], list) or []: |  | ||||||
|                 t = try_get( |  | ||||||
|                     teaser, lambda x: x['http://zdf.de/rels/target'], dict) |  | ||||||
|                 if not t: |  | ||||||
|                     continue |  | ||||||
|                 items.extend(try_get( |  | ||||||
|                     t, |  | ||||||
|                     lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], |  | ||||||
|                     list) or []) |  | ||||||
|             items.extend(try_get( |  | ||||||
|                 module, |  | ||||||
|                 lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], |  | ||||||
|                 list) or []) |  | ||||||
| 
 |  | ||||||
|         entries = [] |  | ||||||
|         entry_urls = set() |  | ||||||
|         for item in items: |  | ||||||
|             t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) |  | ||||||
|             if not t: |  | ||||||
|                 continue |  | ||||||
|             sharing_url = t.get('http://zdf.de/rels/sharing-url') |  | ||||||
|             if not sharing_url or not isinstance(sharing_url, compat_str): |  | ||||||
|                 continue |  | ||||||
|             if sharing_url in entry_urls: |  | ||||||
|                 continue |  | ||||||
|             entry_urls.add(sharing_url) |  | ||||||
|             entries.append(self.url_result( |  | ||||||
|                 sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) |  | ||||||
| 
 |  | ||||||
|         return self.playlist_result(entries, channel_id, channel.get('title')) |  | ||||||
|         """ |  | ||||||
|   | |||||||
| @@ -685,7 +685,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): | |||||||
|             return '\0_' |             return '\0_' | ||||||
|         return char |         return char | ||||||
| 
 | 
 | ||||||
|     if restricted and is_id is NO_DEFAULT: |     # Replace look-alike Unicode glyphs | ||||||
|  |     if restricted and (is_id is NO_DEFAULT or not is_id): | ||||||
|         s = unicodedata.normalize('NFKC', s) |         s = unicodedata.normalize('NFKC', s) | ||||||
|     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps |     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps | ||||||
|     result = ''.join(map(replace_insane, s)) |     result = ''.join(map(replace_insane, s)) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan