mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
This commit is contained in:
		@@ -154,27 +154,38 @@ class InfoExtractor(object):
 | 
				
			|||||||
    def IE_NAME(self):
 | 
					    def IE_NAME(self):
 | 
				
			||||||
        return type(self).__name__[:-2]
 | 
					        return type(self).__name__[:-2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
 | 
					    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
 | 
				
			||||||
        """ Returns the response handle """
 | 
					        """ Returns the response handle """
 | 
				
			||||||
        if note is None:
 | 
					        if note is None:
 | 
				
			||||||
            self.report_download_webpage(video_id)
 | 
					            self.report_download_webpage(video_id)
 | 
				
			||||||
        elif note is not False:
 | 
					        elif note is not False:
 | 
				
			||||||
 | 
					            if video_id is None:
 | 
				
			||||||
 | 
					                self.to_screen(u'%s' % (note,))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
                self.to_screen(u'%s: %s' % (video_id, note))
 | 
					                self.to_screen(u'%s: %s' % (video_id, note))
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            return self._downloader.urlopen(url_or_request)
 | 
					            return self._downloader.urlopen(url_or_request)
 | 
				
			||||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
					        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
				
			||||||
            if errnote is None:
 | 
					            if errnote is None:
 | 
				
			||||||
                errnote = u'Unable to download webpage'
 | 
					                errnote = u'Unable to download webpage'
 | 
				
			||||||
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 | 
					            errmsg = u'%s: %s' % (errnote, compat_str(err))
 | 
				
			||||||
 | 
					            if fatal:
 | 
				
			||||||
 | 
					                raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self._downloader.report_warning(errmsg)
 | 
				
			||||||
 | 
					                return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
 | 
					    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
 | 
				
			||||||
        """ Returns a tuple (page content as string, URL handle) """
 | 
					        """ Returns a tuple (page content as string, URL handle) """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Strip hashes from the URL (#1038)
 | 
					        # Strip hashes from the URL (#1038)
 | 
				
			||||||
        if isinstance(url_or_request, (compat_str, str)):
 | 
					        if isinstance(url_or_request, (compat_str, str)):
 | 
				
			||||||
            url_or_request = url_or_request.partition('#')[0]
 | 
					            url_or_request = url_or_request.partition('#')[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        urlh = self._request_webpage(url_or_request, video_id, note, errnote)
 | 
					        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
 | 
				
			||||||
 | 
					        if urlh is False:
 | 
				
			||||||
 | 
					            assert not fatal
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
        content_type = urlh.headers.get('Content-Type', '')
 | 
					        content_type = urlh.headers.get('Content-Type', '')
 | 
				
			||||||
        webpage_bytes = urlh.read()
 | 
					        webpage_bytes = urlh.read()
 | 
				
			||||||
        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
 | 
					        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
 | 
				
			||||||
@@ -209,9 +220,14 @@ class InfoExtractor(object):
 | 
				
			|||||||
        content = webpage_bytes.decode(encoding, 'replace')
 | 
					        content = webpage_bytes.decode(encoding, 'replace')
 | 
				
			||||||
        return (content, urlh)
 | 
					        return (content, urlh)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
 | 
					    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
 | 
				
			||||||
        """ Returns the data of the page as a string """
 | 
					        """ Returns the data of the page as a string """
 | 
				
			||||||
        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
 | 
					        res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
 | 
				
			||||||
 | 
					        if res is False:
 | 
				
			||||||
 | 
					            return res
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            content, _ = res
 | 
				
			||||||
 | 
					            return content
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _download_xml(self, url_or_request, video_id,
 | 
					    def _download_xml(self, url_or_request, video_id,
 | 
				
			||||||
                      note=u'Downloading XML', errnote=u'Unable to download XML'):
 | 
					                      note=u'Downloading XML', errnote=u'Unable to download XML'):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
				
			|||||||
    # If True it will raise an error if no login info is provided
 | 
					    # If True it will raise an error if no login info is provided
 | 
				
			||||||
    _LOGIN_REQUIRED = False
 | 
					    _LOGIN_REQUIRED = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def report_lang(self):
 | 
					 | 
				
			||||||
        """Report attempt to set language."""
 | 
					 | 
				
			||||||
        self.to_screen(u'Setting language')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _set_language(self):
 | 
					    def _set_language(self):
 | 
				
			||||||
        request = compat_urllib_request.Request(self._LANG_URL)
 | 
					        return bool(self._download_webpage(
 | 
				
			||||||
        try:
 | 
					            self._LANG_URL, None,
 | 
				
			||||||
            self.report_lang()
 | 
					            note=u'Setting language', errnote='unable to set language',
 | 
				
			||||||
            self._download_webpage(self._LANG_URL, None, False)
 | 
					            fatal=False))
 | 
				
			||||||
        except ExtractorError as err:
 | 
					 | 
				
			||||||
            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        return True
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _login(self):
 | 
					    def _login(self):
 | 
				
			||||||
        (username, password) = self._get_login_info()
 | 
					        (username, password) = self._get_login_info()
 | 
				
			||||||
@@ -64,8 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
				
			|||||||
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 | 
					                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        login_page = self._download_webpage(self._LOGIN_URL, None, False,
 | 
					        login_page = self._download_webpage(
 | 
				
			||||||
            u'Unable to fetch login page')
 | 
					            self._LOGIN_URL, None,
 | 
				
			||||||
 | 
					            note=u'Downloading login page',
 | 
				
			||||||
 | 
					            errnote=u'unable to fetch login page', fatal=False)
 | 
				
			||||||
 | 
					        if login_page is False:
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
 | 
					        galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
 | 
				
			||||||
                                  login_page, u'Login GALX parameter')
 | 
					                                  login_page, u'Login GALX parameter')
 | 
				
			||||||
@@ -95,16 +91,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
				
			|||||||
        # chokes on unicode
 | 
					        # chokes on unicode
 | 
				
			||||||
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 | 
					        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 | 
				
			||||||
        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 | 
					        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 | 
				
			||||||
        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 | 
					
 | 
				
			||||||
        try:
 | 
					        req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 | 
				
			||||||
            self.report_login()
 | 
					        login_results = self._download_webpage(
 | 
				
			||||||
            login_results = self._download_webpage(request, None, False)
 | 
					            req, None,
 | 
				
			||||||
 | 
					            note=u'Logging in', errnote=u'unable to log in', fatal=False)
 | 
				
			||||||
 | 
					        if login_results is False:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 | 
					        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 | 
				
			||||||
            self._downloader.report_warning(u'unable to log in: bad username or password')
 | 
					            self._downloader.report_warning(u'unable to log in: bad username or password')
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
        except ExtractorError as err:
 | 
					 | 
				
			||||||
            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        return True
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _confirm_age(self):
 | 
					    def _confirm_age(self):
 | 
				
			||||||
@@ -112,9 +108,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
				
			|||||||
            'next_url': '/',
 | 
					            'next_url': '/',
 | 
				
			||||||
            'action_confirm': 'Confirm',
 | 
					            'action_confirm': 'Confirm',
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 | 
					        req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 | 
				
			||||||
        self.report_age_confirmation()
 | 
					
 | 
				
			||||||
        self._download_webpage(request, None, False, u'Unable to confirm age')
 | 
					        self._download_webpage(
 | 
				
			||||||
 | 
					            req, None,
 | 
				
			||||||
 | 
					            note=u'Confirming age', errnote=u'Unable to confirm age')
 | 
				
			||||||
        return True
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_initialize(self):
 | 
					    def _real_initialize(self):
 | 
				
			||||||
@@ -1736,11 +1734,14 @@ class YoutubeSearchIE(SearchInfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        while (50 * pagenum) < limit:
 | 
					        while (50 * pagenum) < limit:
 | 
				
			||||||
            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
 | 
					            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
 | 
				
			||||||
            data = self._download_webpage(result_url, u'query "%s"' % query,
 | 
					            data_json = self._download_webpage(
 | 
				
			||||||
                u'Downloading page %s' % pagenum, u'Unable to download API page')
 | 
					                result_url, video_id=u'query "%s"' % query,
 | 
				
			||||||
            api_response = json.loads(data)['data']
 | 
					                note=u'Downloading page %s' % (pagenum + 1),
 | 
				
			||||||
 | 
					                errnote=u'Unable to download API page')
 | 
				
			||||||
 | 
					            data = json.loads(data_json)
 | 
				
			||||||
 | 
					            api_response = data['data']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if not 'items' in api_response:
 | 
					            if 'items' not in api_response:
 | 
				
			||||||
                raise ExtractorError(u'[youtube] No video results')
 | 
					                raise ExtractorError(u'[youtube] No video results')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            new_ids = list(video['id'] for video in api_response['items'])
 | 
					            new_ids = list(video['id'] for video in api_response['items'])
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user