mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
This commit is contained in:
		| @@ -154,27 +154,38 @@ class InfoExtractor(object): | ||||
|     def IE_NAME(self): | ||||
|         return type(self).__name__[:-2] | ||||
|  | ||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): | ||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         """ Returns the response handle """ | ||||
|         if note is None: | ||||
|             self.report_download_webpage(video_id) | ||||
|         elif note is not False: | ||||
|             if video_id is None: | ||||
|                 self.to_screen(u'%s' % (note,)) | ||||
|             else: | ||||
|                 self.to_screen(u'%s: %s' % (video_id, note)) | ||||
|         try: | ||||
|             return self._downloader.urlopen(url_or_request) | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             if errnote is None: | ||||
|                 errnote = u'Unable to download webpage' | ||||
|             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) | ||||
|             errmsg = u'%s: %s' % (errnote, compat_str(err)) | ||||
|             if fatal: | ||||
|                 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) | ||||
|             else: | ||||
|                 self._downloader.report_warning(errmsg) | ||||
|                 return False | ||||
|  | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         """ Returns a tuple (page content as string, URL handle) """ | ||||
|  | ||||
|         # Strip hashes from the URL (#1038) | ||||
|         if isinstance(url_or_request, (compat_str, str)): | ||||
|             url_or_request = url_or_request.partition('#')[0] | ||||
|  | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote) | ||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) | ||||
|         if urlh is False: | ||||
|             assert not fatal | ||||
|             return False | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
| @@ -209,9 +220,14 @@ class InfoExtractor(object): | ||||
|         content = webpage_bytes.decode(encoding, 'replace') | ||||
|         return (content, urlh) | ||||
|  | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|         """ Returns the data of the page as a string """ | ||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] | ||||
|         res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) | ||||
|         if res is False: | ||||
|             return res | ||||
|         else: | ||||
|             content, _ = res | ||||
|             return content | ||||
|  | ||||
|     def _download_xml(self, url_or_request, video_id, | ||||
|                       note=u'Downloading XML', errnote=u'Unable to download XML'): | ||||
|   | ||||
| @@ -42,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     # If True it will raise an error if no login info is provided | ||||
|     _LOGIN_REQUIRED = False | ||||
|  | ||||
|     def report_lang(self): | ||||
|         """Report attempt to set language.""" | ||||
|         self.to_screen(u'Setting language') | ||||
|  | ||||
|     def _set_language(self): | ||||
|         request = compat_urllib_request.Request(self._LANG_URL) | ||||
|         try: | ||||
|             self.report_lang() | ||||
|             self._download_webpage(self._LANG_URL, None, False) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause)) | ||||
|             return False | ||||
|         return True | ||||
|         return bool(self._download_webpage( | ||||
|             self._LANG_URL, None, | ||||
|             note=u'Setting language', errnote='unable to set language', | ||||
|             fatal=False)) | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
| @@ -64,8 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return False | ||||
|  | ||||
|         login_page = self._download_webpage(self._LOGIN_URL, None, False, | ||||
|             u'Unable to fetch login page') | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, | ||||
|             note=u'Downloading login page', | ||||
|             errnote=u'unable to fetch login page', fatal=False) | ||||
|         if login_page is False: | ||||
|             return | ||||
|  | ||||
|         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', | ||||
|                                   login_page, u'Login GALX parameter') | ||||
| @@ -95,16 +91,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = self._download_webpage(request, None, False) | ||||
|  | ||||
|         req = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         login_results = self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Logging in', errnote=u'unable to log in', fatal=False) | ||||
|         if login_results is False: | ||||
|             return False | ||||
|         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|             self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|             return False | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause)) | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _confirm_age(self): | ||||
| @@ -112,9 +108,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             'next_url': '/', | ||||
|             'action_confirm': 'Confirm', | ||||
|         } | ||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|         self.report_age_confirmation() | ||||
|         self._download_webpage(request, None, False, u'Unable to confirm age') | ||||
|         req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|  | ||||
|         self._download_webpage( | ||||
|             req, None, | ||||
|             note=u'Confirming age', errnote=u'Unable to confirm age') | ||||
|         return True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
| @@ -1736,11 +1734,14 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|  | ||||
|         while (50 * pagenum) < limit: | ||||
|             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) | ||||
|             data = self._download_webpage(result_url, u'query "%s"' % query, | ||||
|                 u'Downloading page %s' % pagenum, u'Unable to download API page') | ||||
|             api_response = json.loads(data)['data'] | ||||
|             data_json = self._download_webpage( | ||||
|                 result_url, video_id=u'query "%s"' % query, | ||||
|                 note=u'Downloading page %s' % (pagenum + 1), | ||||
|                 errnote=u'Unable to download API page') | ||||
|             data = json.loads(data_json) | ||||
|             api_response = data['data'] | ||||
|  | ||||
|             if not 'items' in api_response: | ||||
|             if 'items' not in api_response: | ||||
|                 raise ExtractorError(u'[youtube] No video results') | ||||
|  | ||||
|             new_ids = list(video['id'] for video in api_response['items']) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister