mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Update to ytdl-commit-9f6c03
[cbsnews] Fix extraction for python <3.6
9f6c03a006
			
			
This commit is contained in:
		| @@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE): | ||||
|     def _real_extract(self, url): | ||||
|         item = self._parse_json(zlib.decompress(compat_b64decode( | ||||
|             compat_urllib_parse_unquote(self._match_id(url))), | ||||
|             -zlib.MAX_WBITS), None)['video']['items'][0] | ||||
|             -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0] | ||||
|         return self._extract_video_info(item['mpxRefId'], 'cbsnews') | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -122,6 +122,26 @@ class LBRYIE(LBRYBaseIE): | ||||
|             'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212', | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|     }, { | ||||
|         # HLS | ||||
|         'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e', | ||||
|         'md5': 'fc82f45ea54915b1495dd7cb5cc1289f', | ||||
|         'info_dict': { | ||||
|             'id': 'e51671357333fe22ae88aad320bde2f6f96b1410', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁', | ||||
|             'description': 'md5:9c539c6a03fb843956de61a4d5288d5e', | ||||
|             'timestamp': 1618254123, | ||||
|             'upload_date': '20210412', | ||||
|             'release_timestamp': 1618254002, | ||||
|             'release_date': '20210412', | ||||
|             'tags': list, | ||||
|             'duration': 554, | ||||
|             'channel': 'Gardening In Canada', | ||||
|             'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc', | ||||
|             'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc', | ||||
|             'formats': 'mincount:3', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e', | ||||
|         'only_matching': True, | ||||
| @@ -168,10 +188,18 @@ class LBRYIE(LBRYBaseIE): | ||||
|         streaming_url = self._call_api_proxy( | ||||
|             'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url'] | ||||
|         info = self._parse_stream(result, url) | ||||
|         urlh = self._request_webpage( | ||||
|             streaming_url, display_id, note='Downloading streaming redirect url info') | ||||
|         if determine_ext(urlh.geturl()) == 'm3u8': | ||||
|             info['formats'] = self._extract_m3u8_formats( | ||||
|                 urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls') | ||||
|             self._sort_formats(info['formats']) | ||||
|         else: | ||||
|             info['url'] = streaming_url | ||||
|         info.update({ | ||||
|             'id': claim_id, | ||||
|             'title': title, | ||||
|             'url': streaming_url, | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|   | ||||
| @@ -393,7 +393,7 @@ query viewClip { | ||||
|                 # To somewhat reduce the probability of these consequences | ||||
|                 # we will sleep random amount of time before each call to ViewClip. | ||||
|                 self._sleep( | ||||
|                     random.randint(2, 5), display_id, | ||||
|                     random.randint(5, 10), display_id, | ||||
|                     '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') | ||||
|  | ||||
|                 if not viewclip: | ||||
|   | ||||
| @@ -53,6 +53,10 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| def parse_qs(url): | ||||
|     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|  | ||||
|  | ||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     """Provide base functions for Youtube extractors""" | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
| @@ -438,14 +442,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         r'(?:(?:www|dev)\.)?invidio\.us', | ||||
|         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md | ||||
|         r'(?:www\.)?invidious\.pussthecat\.org', | ||||
|         r'(?:www\.)?invidious\.048596\.xyz', | ||||
|         r'(?:www\.)?invidious\.zee\.li', | ||||
|         r'(?:www\.)?vid\.puffyan\.us', | ||||
|         r'(?:(?:www|au)\.)?ytprivate\.com', | ||||
|         r'(?:www\.)?invidious\.namazso\.eu', | ||||
|         r'(?:www\.)?invidious\.ethibox\.fr', | ||||
|         r'(?:www\.)?inv\.skyn3t\.in', | ||||
|         r'(?:www\.)?invidious\.himiko\.cloud', | ||||
|         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion', | ||||
|         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', | ||||
|         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion', | ||||
| @@ -454,25 +454,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         r'(?:(?:www|no)\.)?invidiou\.sh', | ||||
|         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org', | ||||
|         r'(?:www\.)?invidious\.kabi\.tk', | ||||
|         r'(?:www\.)?invidious\.13ad\.de', | ||||
|         r'(?:www\.)?invidious\.mastodon\.host', | ||||
|         r'(?:www\.)?invidious\.zapashcanon\.fr', | ||||
|         r'(?:www\.)?invidious\.kavin\.rocks', | ||||
|         r'(?:www\.)?invidious\.tinfoil-hat\.net', | ||||
|         r'(?:www\.)?invidious\.himiko\.cloud', | ||||
|         r'(?:www\.)?invidious\.reallyancient\.tech', | ||||
|         r'(?:www\.)?invidious\.tube', | ||||
|         r'(?:www\.)?invidiou\.site', | ||||
|         r'(?:www\.)?invidious\.site', | ||||
|         r'(?:www\.)?invidious\.xyz', | ||||
|         r'(?:www\.)?invidious\.nixnet\.xyz', | ||||
|         r'(?:www\.)?invidious\.048596\.xyz', | ||||
|         r'(?:www\.)?invidious\.drycat\.fr', | ||||
|         r'(?:www\.)?inv\.skyn3t\.in', | ||||
|         r'(?:www\.)?tube\.poal\.co', | ||||
|         r'(?:www\.)?tube\.connect\.cafe', | ||||
|         r'(?:www\.)?vid\.wxzm\.sx', | ||||
|         r'(?:www\.)?vid\.mint\.lgbt', | ||||
|         r'(?:www\.)?vid\.puffyan\.us', | ||||
|         r'(?:www\.)?yewtu\.be', | ||||
|         r'(?:www\.)?yt\.elukerio\.org', | ||||
|         r'(?:www\.)?yt\.lelux\.fi', | ||||
|         r'(?:www\.)?invidious\.ggc-project\.de', | ||||
|         r'(?:www\.)?yt\.maisputain\.ovh', | ||||
|         r'(?:www\.)?ytprivate\.com', | ||||
|         r'(?:www\.)?invidious\.13ad\.de', | ||||
|         r'(?:www\.)?invidious\.toot\.koeln', | ||||
|         r'(?:www\.)?invidious\.fdn\.fr', | ||||
|         r'(?:www\.)?watch\.nettohikari\.com', | ||||
| @@ -515,16 +522,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= | ||||
|                          ) | ||||
|                      )?                                                       # all until now is optional -> you can pass the naked ID | ||||
|                      (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID | ||||
|                      (?!.*?\blist= | ||||
|                         (?: | ||||
|                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE | ||||
|                             WL                                                # WL are handled by the watch later IE | ||||
|                         ) | ||||
|                      ) | ||||
|                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID | ||||
|                      (?(1).+)?                                                # if we found the ID, everything can follow | ||||
|                      $""" % { | ||||
|         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, | ||||
|         'invidious': '|'.join(_INVIDIOUS_SITES), | ||||
|     } | ||||
|     _PLAYER_INFO_RE = ( | ||||
| @@ -1009,6 +1009,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             }, | ||||
|             'skip': 'This video does not exist.', | ||||
|         }, | ||||
|         { | ||||
|             # Video with incomplete 'yt:stretch=16:' | ||||
|             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # Video licensed under Creative Commons | ||||
|             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA', | ||||
| @@ -1304,6 +1309,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         qs = parse_qs(url) | ||||
|         if qs.get('list', [None])[0]: | ||||
|             return False | ||||
|         return super(YoutubeIE, cls).suitable(url) | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(YoutubeIE, self).__init__(*args, **kwargs) | ||||
|         self._code_cache = {} | ||||
| @@ -2079,15 +2091,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)] | ||||
|         for keyword in keywords: | ||||
|             if keyword.startswith('yt:stretch='): | ||||
|                 stretch_ratio = map( | ||||
|                     lambda x: int_or_none(x, default=0), | ||||
|                     keyword.split('=')[1].split(':')) | ||||
|                 w, h = (list(stretch_ratio) + [0])[:2] | ||||
|                 if w > 0 and h > 0: | ||||
|                     ratio = w / h | ||||
|                     for f in formats: | ||||
|                         if f.get('vcodec') != 'none': | ||||
|                             f['stretched_ratio'] = ratio | ||||
|                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword) | ||||
|                 if mobj: | ||||
|                     # NB: float is intentional for forcing float division | ||||
|                     w, h = (float(v) for v in mobj.groups()) | ||||
|                     if w > 0 and h > 0: | ||||
|                         ratio = w / h | ||||
|                         for f in formats: | ||||
|                             if f.get('vcodec') != 'none': | ||||
|                                 f['stretched_ratio'] = ratio | ||||
|                         break | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for container in (video_details, microformat): | ||||
| @@ -2484,6 +2497,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', | ||||
|             'uploader': 'Игорь Клейнер', | ||||
|         }, | ||||
|     }, { | ||||
|         # playlists, series | ||||
|         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', | ||||
|         'playlist_mincount': 5, | ||||
|         'info_dict': { | ||||
|             'id': 'UCYO_jab_esuFRV4b17AJtAw', | ||||
|             'title': '3Blue1Brown - Playlists', | ||||
|             'description': 'md5:e1384e8a133307dd10edee76e875d62f', | ||||
|         }, | ||||
|     }, { | ||||
|         # playlists, singlepage | ||||
|         'url': 'https://www.youtube.com/user/ThirstForScience/playlists', | ||||
| @@ -2790,6 +2812,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|             'title': '#cctv9', | ||||
|         }, | ||||
|         'playlist_mincount': 350, | ||||
|     }, { | ||||
|         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -2813,14 +2838,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|     @staticmethod | ||||
|     def _extract_basic_item_renderer(item): | ||||
|         # Modified from _extract_grid_item_renderer | ||||
|         known_renderers = ( | ||||
|             'playlistRenderer', 'videoRenderer', 'channelRenderer', | ||||
|             'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer' | ||||
|         known_basic_renderers = ( | ||||
|             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' | ||||
|         ) | ||||
|         for key, renderer in item.items(): | ||||
|             if key not in known_renderers: | ||||
|             if not isinstance(renderer, dict): | ||||
|                 continue | ||||
|             return renderer | ||||
|             elif key in known_basic_renderers: | ||||
|                 return renderer | ||||
|             elif key.startswith('grid') and key.endswith('Renderer'): | ||||
|                 return renderer | ||||
|  | ||||
|     def _grid_entries(self, grid_renderer): | ||||
|         for item in grid_renderer['items']: | ||||
| @@ -2830,7 +2857,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|             if not isinstance(renderer, dict): | ||||
|                 continue | ||||
|             title = try_get( | ||||
|                 renderer, lambda x: x['title']['runs'][0]['text'], compat_str) | ||||
|                 renderer, (lambda x: x['title']['runs'][0]['text'], | ||||
|                            lambda x: x['title']['simpleText']), compat_str) | ||||
|             # playlist | ||||
|             playlist_id = renderer.get('playlistId') | ||||
|             if playlist_id: | ||||
| @@ -2838,10 +2866,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|                     'https://www.youtube.com/playlist?list=%s' % playlist_id, | ||||
|                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id, | ||||
|                     video_title=title) | ||||
|                 continue | ||||
|             # video | ||||
|             video_id = renderer.get('videoId') | ||||
|             if video_id: | ||||
|                 yield self._extract_video(renderer) | ||||
|                 continue | ||||
|             # channel | ||||
|             channel_id = renderer.get('channelId') | ||||
|             if channel_id: | ||||
| @@ -2850,6 +2880,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|                 yield self.url_result( | ||||
|                     'https://www.youtube.com/channel/%s' % channel_id, | ||||
|                     ie=YoutubeTabIE.ie_key(), video_title=title) | ||||
|                 continue | ||||
|             # generic endpoint URL support | ||||
|             ep_url = urljoin('https://www.youtube.com/', try_get( | ||||
|                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], | ||||
|                 compat_str)) | ||||
|             if ep_url: | ||||
|                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): | ||||
|                     if ie.suitable(ep_url): | ||||
|                         yield self.url_result( | ||||
|                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) | ||||
|                         break | ||||
|  | ||||
|     def _shelf_entries_from_content(self, shelf_renderer): | ||||
|         content = shelf_renderer.get('content') | ||||
| @@ -3444,7 +3485,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | ||||
|             url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '') | ||||
|  | ||||
|         # Handle both video/playlist URLs | ||||
|         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|         qs = parse_qs(url) | ||||
|         video_id = qs.get('v', [None])[0] | ||||
|         playlist_id = qs.get('list', [None])[0] | ||||
|  | ||||
| @@ -3550,12 +3591,16 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if YoutubeTabIE.suitable(url) else super( | ||||
|             YoutubePlaylistIE, cls).suitable(url) | ||||
|         if YoutubeTabIE.suitable(url): | ||||
|             return False | ||||
|         qs = parse_qs(url) | ||||
|         if qs.get('v', [None])[0]: | ||||
|             return False | ||||
|         return super(YoutubePlaylistIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|         qs = parse_qs(url) | ||||
|         if not qs: | ||||
|             qs = {'list': playlist_id} | ||||
|         return self.url_result( | ||||
|   | ||||
| @@ -40,6 +40,7 @@ import zlib | ||||
| from .compat import ( | ||||
|     compat_HTMLParseError, | ||||
|     compat_HTMLParser, | ||||
|     compat_HTTPError, | ||||
|     compat_basestring, | ||||
|     compat_chr, | ||||
|     compat_cookiejar, | ||||
| @@ -2925,12 +2926,60 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): | ||||
|  | ||||
|  | ||||
| class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | ||||
|     if sys.version_info[0] < 3: | ||||
|         def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||
|             # On python 2 urlh.geturl() may sometimes return redirect URL | ||||
|             # as byte string instead of unicode. This workaround allows | ||||
|             # to force it always return unicode. | ||||
|             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) | ||||
|     """YoutubeDL redirect handler | ||||
|  | ||||
|     The code is based on HTTPRedirectHandler implementation from CPython [1]. | ||||
|  | ||||
|     This redirect handler solves two issues: | ||||
|      - ensures redirect URL is always unicode under python 2 | ||||
|      - introduces support for experimental HTTP response status code | ||||
|        308 Permanent Redirect [2] used by some sites [3] | ||||
|  | ||||
|     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py | ||||
|     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308 | ||||
|     3. https://github.com/ytdl-org/youtube-dl/issues/28768 | ||||
|     """ | ||||
|  | ||||
|     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 | ||||
|  | ||||
|     def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||
|         """Return a Request or None in response to a redirect. | ||||
|  | ||||
|         This is called by the http_error_30x methods when a | ||||
|         redirection response is received.  If a redirection should | ||||
|         take place, return a new Request to allow http_error_30x to | ||||
|         perform the redirect.  Otherwise, raise HTTPError if no-one | ||||
|         else should try to handle this url.  Return None if you can't | ||||
|         but another Handler might. | ||||
|         """ | ||||
|         m = req.get_method() | ||||
|         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") | ||||
|                  or code in (301, 302, 303) and m == "POST")): | ||||
|             raise compat_HTTPError(req.full_url, code, msg, headers, fp) | ||||
|         # Strictly (according to RFC 2616), 301 or 302 in response to | ||||
|         # a POST MUST NOT cause a redirection without confirmation | ||||
|         # from the user (of urllib.request, in this case).  In practice, | ||||
|         # essentially all clients do redirect in this case, so we do | ||||
|         # the same. | ||||
|  | ||||
|         # On python 2 urlh.geturl() may sometimes return redirect URL | ||||
|         # as byte string instead of unicode. This workaround allows | ||||
|         # to force it always return unicode. | ||||
|         if sys.version_info[0] < 3: | ||||
|             newurl = compat_str(newurl) | ||||
|  | ||||
|         # Be conciliant with URIs containing a space.  This is mainly | ||||
|         # redundant with the more complete encoding done in http_error_302(), | ||||
|         # but it is kept for compatibility with other callers. | ||||
|         newurl = newurl.replace(' ', '%20') | ||||
|  | ||||
|         CONTENT_HEADERS = ("content-length", "content-type") | ||||
|         # NB: don't use dict comprehension for python 2.6 compatibility | ||||
|         newheaders = dict((k, v) for k, v in req.headers.items() | ||||
|                           if k.lower() not in CONTENT_HEADERS) | ||||
|         return compat_urllib_request.Request( | ||||
|             newurl, headers=newheaders, origin_req_host=req.origin_req_host, | ||||
|             unverifiable=True) | ||||
|  | ||||
|  | ||||
| def extract_timezone(date_str): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan