mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
This commit is contained in:
		| @@ -154,27 +154,38 @@ class InfoExtractor(object): | |||||||
|     def IE_NAME(self): |     def IE_NAME(self): | ||||||
|         return type(self).__name__[:-2] |         return type(self).__name__[:-2] | ||||||
|  |  | ||||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): |     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         """ Returns the response handle """ |         """ Returns the response handle """ | ||||||
|         if note is None: |         if note is None: | ||||||
|             self.report_download_webpage(video_id) |             self.report_download_webpage(video_id) | ||||||
|         elif note is not False: |         elif note is not False: | ||||||
|             self.to_screen(u'%s: %s' % (video_id, note)) |             if video_id is None: | ||||||
|  |                 self.to_screen(u'%s' % (note,)) | ||||||
|  |             else: | ||||||
|  |                 self.to_screen(u'%s: %s' % (video_id, note)) | ||||||
|         try: |         try: | ||||||
|             return self._downloader.urlopen(url_or_request) |             return self._downloader.urlopen(url_or_request) | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|             if errnote is None: |             if errnote is None: | ||||||
|                 errnote = u'Unable to download webpage' |                 errnote = u'Unable to download webpage' | ||||||
|             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) |             errmsg = u'%s: %s' % (errnote, compat_str(err)) | ||||||
|  |             if fatal: | ||||||
|  |                 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) | ||||||
|  |             else: | ||||||
|  |                 self._downloader.report_warning(errmsg) | ||||||
|  |                 return False | ||||||
|  |  | ||||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): |     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         """ Returns a tuple (page content as string, URL handle) """ |         """ Returns a tuple (page content as string, URL handle) """ | ||||||
|  |  | ||||||
|         # Strip hashes from the URL (#1038) |         # Strip hashes from the URL (#1038) | ||||||
|         if isinstance(url_or_request, (compat_str, str)): |         if isinstance(url_or_request, (compat_str, str)): | ||||||
|             url_or_request = url_or_request.partition('#')[0] |             url_or_request = url_or_request.partition('#')[0] | ||||||
|  |  | ||||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote) |         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) | ||||||
|  |         if urlh is False: | ||||||
|  |             assert not fatal | ||||||
|  |             return False | ||||||
|         content_type = urlh.headers.get('Content-Type', '') |         content_type = urlh.headers.get('Content-Type', '') | ||||||
|         webpage_bytes = urlh.read() |         webpage_bytes = urlh.read() | ||||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) |         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||||
| @@ -209,9 +220,14 @@ class InfoExtractor(object): | |||||||
|         content = webpage_bytes.decode(encoding, 'replace') |         content = webpage_bytes.decode(encoding, 'replace') | ||||||
|         return (content, urlh) |         return (content, urlh) | ||||||
|  |  | ||||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): |     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||||
|         """ Returns the data of the page as a string """ |         """ Returns the data of the page as a string """ | ||||||
|         return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] |         res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) | ||||||
|  |         if res is False: | ||||||
|  |             return res | ||||||
|  |         else: | ||||||
|  |             content, _ = res | ||||||
|  |             return content | ||||||
|  |  | ||||||
|     def _download_xml(self, url_or_request, video_id, |     def _download_xml(self, url_or_request, video_id, | ||||||
|                       note=u'Downloading XML', errnote=u'Unable to download XML'): |                       note=u'Downloading XML', errnote=u'Unable to download XML'): | ||||||
|   | |||||||
| @@ -42,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||||||
|     # If True it will raise an error if no login info is provided |     # If True it will raise an error if no login info is provided | ||||||
|     _LOGIN_REQUIRED = False |     _LOGIN_REQUIRED = False | ||||||
|  |  | ||||||
|     def report_lang(self): |  | ||||||
|         """Report attempt to set language.""" |  | ||||||
|         self.to_screen(u'Setting language') |  | ||||||
|  |  | ||||||
|     def _set_language(self): |     def _set_language(self): | ||||||
|         request = compat_urllib_request.Request(self._LANG_URL) |         return bool(self._download_webpage( | ||||||
|         try: |             self._LANG_URL, None, | ||||||
|             self.report_lang() |             note=u'Setting language', errnote='unable to set language', | ||||||
|             self._download_webpage(self._LANG_URL, None, False) |             fatal=False)) | ||||||
|         except ExtractorError as err: |  | ||||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause)) |  | ||||||
|             return False |  | ||||||
|         return True |  | ||||||
|  |  | ||||||
|     def _login(self): |     def _login(self): | ||||||
|         (username, password) = self._get_login_info() |         (username, password) = self._get_login_info() | ||||||
| @@ -64,8 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) |                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||||
|             return False |             return False | ||||||
|  |  | ||||||
|         login_page = self._download_webpage(self._LOGIN_URL, None, False, |         login_page = self._download_webpage( | ||||||
|             u'Unable to fetch login page') |             self._LOGIN_URL, None, | ||||||
|  |             note=u'Downloading login page', | ||||||
|  |             errnote=u'unable to fetch login page', fatal=False) | ||||||
|  |         if login_page is False: | ||||||
|  |             return | ||||||
|  |  | ||||||
|         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', |         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', | ||||||
|                                   login_page, u'Login GALX parameter') |                                   login_page, u'Login GALX parameter') | ||||||
| @@ -95,26 +91,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||||||
|         # chokes on unicode |         # chokes on unicode | ||||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) |         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') |         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) |  | ||||||
|         try: |         req = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||||
|             self.report_login() |         login_results = self._download_webpage( | ||||||
|             login_results = self._download_webpage(request, None, False) |             req, None, | ||||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: |             note=u'Logging in', errnote=u'unable to log in', fatal=False) | ||||||
|                 self._downloader.report_warning(u'unable to log in: bad username or password') |         if login_results is False: | ||||||
|                 return False |             return False | ||||||
|         except ExtractorError as err: |         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause)) |             self._downloader.report_warning(u'unable to log in: bad username or password') | ||||||
|             return False |             return False | ||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def _confirm_age(self): |     def _confirm_age(self): | ||||||
|         age_form = { |         age_form = { | ||||||
|                 'next_url':     '/', |             'next_url': '/', | ||||||
|                 'action_confirm':   'Confirm', |             'action_confirm': 'Confirm', | ||||||
|                 } |         } | ||||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) |         req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||||
|         self.report_age_confirmation() |  | ||||||
|         self._download_webpage(request, None, False, u'Unable to confirm age') |         self._download_webpage( | ||||||
|  |             req, None, | ||||||
|  |             note=u'Confirming age', errnote=u'Unable to confirm age') | ||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def _real_initialize(self): |     def _real_initialize(self): | ||||||
| @@ -1736,11 +1734,14 @@ class YoutubeSearchIE(SearchInfoExtractor): | |||||||
|  |  | ||||||
|         while (50 * pagenum) < limit: |         while (50 * pagenum) < limit: | ||||||
|             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) |             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) | ||||||
|             data = self._download_webpage(result_url, u'query "%s"' % query, |             data_json = self._download_webpage( | ||||||
|                 u'Downloading page %s' % pagenum, u'Unable to download API page') |                 result_url, video_id=u'query "%s"' % query, | ||||||
|             api_response = json.loads(data)['data'] |                 note=u'Downloading page %s' % (pagenum + 1), | ||||||
|  |                 errnote=u'Unable to download API page') | ||||||
|  |             data = json.loads(data_json) | ||||||
|  |             api_response = data['data'] | ||||||
|  |  | ||||||
|             if not 'items' in api_response: |             if 'items' not in api_response: | ||||||
|                 raise ExtractorError(u'[youtube] No video results') |                 raise ExtractorError(u'[youtube] No video results') | ||||||
|  |  | ||||||
|             new_ids = list(video['id'] for video in api_response['items']) |             new_ids = list(video['id'] for video in api_response['items']) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister