mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction
This commit is contained in:
		| @@ -323,10 +323,15 @@ class InfoExtractor(object): | ||||
|     _real_extract() methods and define a _VALID_URL regexp. | ||||
|     Probably, they should also be added to the list of extractors. | ||||
|  | ||||
|     _BYPASS_GEO attribute may be set to False in order to disable | ||||
|     _GEO_BYPASS attribute may be set to False in order to disable | ||||
|     geo restriction bypass mechanisms for a particular extractor. | ||||
|     Though it won't disable explicit geo restriction bypass based on | ||||
|     country code provided with geo_bypass_country. | ||||
|     country code provided with geo_bypass_country. (experimental) | ||||
|  | ||||
|     _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted | ||||
|     countries for this extractor. One of these countries will be used by | ||||
|     geo restriction bypass mechanism right away in order to bypass | ||||
|     geo restriction, of course, if the mechanism is not disabled. (experimental) | ||||
|  | ||||
|     Finally, the _WORKING attribute should be set to False for broken IEs | ||||
|     in order to warn the users and skip the tests. | ||||
| @@ -335,7 +340,8 @@ class InfoExtractor(object): | ||||
|     _ready = False | ||||
|     _downloader = None | ||||
|     _x_forwarded_for_ip = None | ||||
|     _BYPASS_GEO = True | ||||
|     _GEO_BYPASS = True | ||||
|     _GEO_COUNTRIES = None | ||||
|     _WORKING = True | ||||
|  | ||||
|     def __init__(self, downloader=None): | ||||
| @@ -370,14 +376,28 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def initialize(self): | ||||
|         """Initializes an instance (authentication, etc).""" | ||||
|         if not self._x_forwarded_for_ip: | ||||
|             country_code = self._downloader.params.get('geo_bypass_country', None) | ||||
|             if country_code: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) | ||||
|         self.__initialize_geo_bypass() | ||||
|         if not self._ready: | ||||
|             self._real_initialize() | ||||
|             self._ready = True | ||||
|  | ||||
|     def __initialize_geo_bypass(self): | ||||
|         if not self._x_forwarded_for_ip: | ||||
|             country_code = self._downloader.params.get('geo_bypass_country', None) | ||||
|             # If there is no explicit country for geo bypass specified and | ||||
|             # the extractor is known to be geo restricted let's fake IP | ||||
|             # as X-Forwarded-For right away. | ||||
|             if (not country_code and | ||||
|                     self._GEO_BYPASS and | ||||
|                     self._downloader.params.get('geo_bypass', True) and | ||||
|                     self._GEO_COUNTRIES): | ||||
|                 country_code = random.choice(self._GEO_COUNTRIES) | ||||
|             if country_code: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) | ||||
|                 if self._downloader.params.get('verbose', False): | ||||
|                     self._downloader.to_stdout( | ||||
|                         '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         try: | ||||
| @@ -389,16 +409,8 @@ class InfoExtractor(object): | ||||
|                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip | ||||
|                     return ie_result | ||||
|                 except GeoRestrictedError as e: | ||||
|                     if (not self._downloader.params.get('geo_bypass_country', None) and | ||||
|                             self._BYPASS_GEO and | ||||
|                             self._downloader.params.get('geo_bypass', True) and | ||||
|                             not self._x_forwarded_for_ip and | ||||
|                             e.countries): | ||||
|                         self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) | ||||
|                         if self._x_forwarded_for_ip: | ||||
|                             self.report_warning( | ||||
|                                 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) | ||||
|                             continue | ||||
|                     if self.__maybe_fake_ip_and_retry(e.countries): | ||||
|                         continue | ||||
|                     raise | ||||
|         except ExtractorError: | ||||
|             raise | ||||
| @@ -407,6 +419,19 @@ class InfoExtractor(object): | ||||
|         except (KeyError, StopIteration) as e: | ||||
|             raise ExtractorError('An extractor error has occurred.', cause=e) | ||||
|  | ||||
|     def __maybe_fake_ip_and_retry(self, countries): | ||||
|         if (not self._downloader.params.get('geo_bypass_country', None) and | ||||
|                 self._GEO_BYPASS and | ||||
|                 self._downloader.params.get('geo_bypass', True) and | ||||
|                 not self._x_forwarded_for_ip and | ||||
|                 countries): | ||||
|             self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) | ||||
|             if self._x_forwarded_for_ip: | ||||
|                 self.report_warning( | ||||
|                     'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) | ||||
|                 return True | ||||
|         return False | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
|         self._downloader = downloader | ||||
|   | ||||
| @@ -20,6 +20,7 @@ from ..utils import ( | ||||
| class DramaFeverBaseIE(AMPIE): | ||||
|     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' | ||||
|     _NETRC_MACHINE = 'dramafever' | ||||
|     _GEO_COUNTRIES = ['US', 'CA'] | ||||
|  | ||||
|     _CONSUMER_SECRET = 'DA59dtVXYLxajktV' | ||||
|  | ||||
| @@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE): | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg='Currently unavailable in your country', | ||||
|                     countries=['US', 'CA']) | ||||
|                     countries=self._GEO_COUNTRIES) | ||||
|             raise | ||||
|  | ||||
|         series_id, episode_number = video_id.split('.') | ||||
|   | ||||
| @@ -37,6 +37,7 @@ class GoIE(AdobePassIE): | ||||
|         } | ||||
|     } | ||||
|     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', | ||||
|         'info_dict': { | ||||
| @@ -104,7 +105,7 @@ class GoIE(AdobePassIE): | ||||
|                         for error in errors: | ||||
|                             if error.get('code') == 1002: | ||||
|                                 self.raise_geo_restricted( | ||||
|                                     error['message'], countries=['US']) | ||||
|                                     error['message'], countries=self._GEO_COUNTRIES) | ||||
|                         error_message = ', '.join([error['message'] for error in errors]) | ||||
|                         raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) | ||||
|                     asset_url += '?' + entitlement['uplynkData']['sessionKey'] | ||||
|   | ||||
| @@ -24,6 +24,7 @@ from ..utils import ( | ||||
|  | ||||
| class ITVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' | ||||
|     _GEO_COUNTRIES = ['GB'] | ||||
|     _TEST = { | ||||
|         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', | ||||
|         'info_dict': { | ||||
| @@ -101,7 +102,8 @@ class ITVIE(InfoExtractor): | ||||
|             fault_code = xpath_text(resp_env, './/faultcode') | ||||
|             fault_string = xpath_text(resp_env, './/faultstring') | ||||
|             if fault_code == 'InvalidGeoRegion': | ||||
|                 self.raise_geo_restricted(msg=fault_string, countries=['GB']) | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=fault_string, countries=self._GEO_COUNTRIES) | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) | ||||
|         title = xpath_text(playlist, 'EpisodeTitle', fatal=True) | ||||
|         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|   | ||||
| @@ -14,6 +14,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class NRKBaseIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['NO'] | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
| @@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor): | ||||
|             # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* | ||||
|             if 'IsGeoBlocked' in message_type: | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) | ||||
|                     msg=MESSAGES.get('ProgramIsGeoBlocked'), | ||||
|                     countries=self._GEO_COUNTRIES) | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, MESSAGES.get( | ||||
|                     message_type, message_type)), | ||||
|   | ||||
| @@ -10,6 +10,7 @@ from ..utils import ( | ||||
|  | ||||
| class OnDemandKoreaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' | ||||
|     _GEO_COUNTRIES = ['US', 'CA'] | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', | ||||
|         'info_dict': { | ||||
| @@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor): | ||||
|         if 'msg_block_01.png' in webpage: | ||||
|             self.raise_geo_restricted( | ||||
|                 msg='This content is not available in your region', | ||||
|                 countries=['US', 'CA']) | ||||
|                 countries=self._GEO_COUNTRIES) | ||||
|  | ||||
|         if 'This video is only available to ODK PLUS members.' in webpage: | ||||
|             raise ExtractorError( | ||||
|   | ||||
| @@ -193,6 +193,8 @@ class PBSIE(InfoExtractor): | ||||
|         ) | ||||
|     ''' % '|'.join(list(zip(*_STATIONS))[0]) | ||||
|  | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', | ||||
| @@ -492,7 +494,8 @@ class PBSIE(InfoExtractor): | ||||
|                 message = self._ERRORS.get( | ||||
|                     redirect_info['http_code'], redirect_info['message']) | ||||
|                 if redirect_info['http_code'] == 403: | ||||
|                     self.raise_geo_restricted(msg=message, countries=['US']) | ||||
|                     self.raise_geo_restricted( | ||||
|                         msg=message, countries=self._GEO_COUNTRIES) | ||||
|                 raise ExtractorError( | ||||
|                     '%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,8 @@ from ..utils import ( | ||||
|  | ||||
| class SRGSSRIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' | ||||
|     _BYPASS_GEO = False | ||||
|     _GEO_BYPASS = False | ||||
|     _GEO_COUNTRIES = ['CH'] | ||||
|  | ||||
|     _ERRORS = { | ||||
|         'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', | ||||
| @@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor): | ||||
|         if media_data.get('block') and media_data['block'] in self._ERRORS: | ||||
|             message = self._ERRORS[media_data['block']] | ||||
|             if media_data['block'] == 'GEOBLOCK': | ||||
|                 self.raise_geo_restricted(msg=message, countries=['CH']) | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=message, countries=self._GEO_COUNTRIES) | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|  | ||||
|   | ||||
| @@ -13,6 +13,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SVTBaseIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['SE'] | ||||
|     def _extract_video(self, video_info, video_id): | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
| @@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor): | ||||
|                 }) | ||||
|         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | ||||
|             self.raise_geo_restricted( | ||||
|                 'This video is only available in Sweden', countries=['SE']) | ||||
|                 'This video is only available in Sweden', | ||||
|                 countries=self._GEO_COUNTRIES) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor): | ||||
|                         ) | ||||
|                         (?P<id>[\da-fA-F]+) | ||||
|                     ''' | ||||
|     _GEO_COUNTRIES = ['BG'] | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vbox7.com/play:0946fff23c', | ||||
|         'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', | ||||
| @@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor): | ||||
|         video_url = video['src'] | ||||
|  | ||||
|         if '/na.mp4' in video_url: | ||||
|             self.raise_geo_restricted(countries=['BG']) | ||||
|             self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | ||||
|  | ||||
|         uploader = video.get('uploader') | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from ..utils import ( | ||||
|  | ||||
| class VGTVIE(XstreamIE): | ||||
|     IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' | ||||
|     _BYPASS_GEO = False | ||||
|     _GEO_BYPASS = False | ||||
|  | ||||
|     _HOST_TO_APPNAME = { | ||||
|         'vgtv.no': 'vgtv', | ||||
| @@ -218,7 +218,8 @@ class VGTVIE(XstreamIE): | ||||
|             properties = try_get( | ||||
|                 data, lambda x: x['streamConfiguration']['properties'], list) | ||||
|             if properties and 'geoblocked' in properties: | ||||
|                 raise self.raise_geo_restricted(countries=['NO']) | ||||
|                 raise self.raise_geo_restricted( | ||||
|                     countries=[host.rpartition('.')[-1].partition('/')[0].upper()]) | ||||
|  | ||||
|         self._sort_formats(info['formats']) | ||||
|  | ||||
|   | ||||
| @@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor): | ||||
|     _APP_VERSION = '2.2.5.1428709186' | ||||
|     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' | ||||
|  | ||||
|     _BYPASS_GEO = False | ||||
|     _GEO_BYPASS = False | ||||
|     _NETRC_MACHINE = 'viki' | ||||
|  | ||||
|     _token = None | ||||
|   | ||||
| @@ -3291,7 +3291,7 @@ class GeoUtils(object): | ||||
|         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] | ||||
|         addr_max = addr_min | (0xffffffff >> int(preflen)) | ||||
|         return compat_str(socket.inet_ntoa( | ||||
|             compat_struct_pack('!I', random.randint(addr_min, addr_max)))) | ||||
|             compat_struct_pack('!L', random.randint(addr_min, addr_max)))) | ||||
|  | ||||
|  | ||||
| class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․