mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Improve geo bypass mechanism
* Introduce geo bypass context * Add ability to bypass based on IP blocks in CIDR notation * Introduce --geo-bypass-ip-block
This commit is contained in:
		| @@ -286,6 +286,9 @@ class YoutubeDL(object): | ||||
|                        Two-letter ISO 3166-2 country code that will be used for | ||||
|                        explicit geographic restriction bypassing via faking | ||||
|                        X-Forwarded-For HTTP header (experimental) | ||||
|     geo_bypass_ip_block: | ||||
|                        IP range in CIDR notation that will be used similarly to | ||||
|                        geo_bypass_country (experimental) | ||||
|  | ||||
|     The following options determine which downloader is picked: | ||||
|     external_downloader: Executable of the external downloader to call. | ||||
|   | ||||
| @@ -430,6 +430,7 @@ def _real_main(argv=None): | ||||
|         'config_location': opts.config_location, | ||||
|         'geo_bypass': opts.geo_bypass, | ||||
|         'geo_bypass_country': opts.geo_bypass_country, | ||||
|         'geo_bypass_ip_block': opts.geo_bypass_ip_block, | ||||
|         # just for deprecation check | ||||
|         'autonumber': opts.autonumber if opts.autonumber is True else None, | ||||
|         'usetitle': opts.usetitle if opts.usetitle is True else None, | ||||
|   | ||||
| @@ -277,7 +277,9 @@ class AnvatoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|         }) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         access_key, video_id = mobj.group('access_key_or_mcp', 'id') | ||||
|   | ||||
| @@ -669,7 +669,10 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|             'ip_blocks': smuggled_data.get('geo_ip_blocks'), | ||||
|         }) | ||||
|  | ||||
|         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|   | ||||
| @@ -346,6 +346,11 @@ class InfoExtractor(object): | ||||
|     geo restriction bypass mechanism right away in order to bypass | ||||
|     geo restriction, of course, if the mechanism is not disabled. (experimental) | ||||
|  | ||||
|     _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted | ||||
|     IP blocks in CIDR notation for this extractor. One of these IP blocks | ||||
|     will be used by geo restriction bypass mechanism similarly | ||||
|     to _GEO_COUNTRIES. (experimental) | ||||
|  | ||||
|     NB: both these geo attributes are experimental and may change in future | ||||
|     or be completely removed. | ||||
|  | ||||
| @@ -358,6 +363,7 @@ class InfoExtractor(object): | ||||
|     _x_forwarded_for_ip = None | ||||
|     _GEO_BYPASS = True | ||||
|     _GEO_COUNTRIES = None | ||||
|     _GEO_IP_BLOCKS = None | ||||
|     _WORKING = True | ||||
|  | ||||
|     def __init__(self, downloader=None): | ||||
| @@ -392,12 +398,15 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def initialize(self): | ||||
|         """Initializes an instance (authentication, etc).""" | ||||
|         self._initialize_geo_bypass(self._GEO_COUNTRIES) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': self._GEO_COUNTRIES, | ||||
|             'ip_blocks': self._GEO_IP_BLOCKS, | ||||
|         }) | ||||
|         if not self._ready: | ||||
|             self._real_initialize() | ||||
|             self._ready = True | ||||
|  | ||||
|     def _initialize_geo_bypass(self, countries): | ||||
|     def _initialize_geo_bypass(self, geo_bypass_context): | ||||
|         """ | ||||
|         Initialize geo restriction bypass mechanism. | ||||
|  | ||||
| @@ -408,28 +417,82 @@ class InfoExtractor(object): | ||||
|         HTTP requests. | ||||
|  | ||||
|         This method will be used for initial geo bypass mechanism initialization | ||||
|         during the instance initialization with _GEO_COUNTRIES. | ||||
|         during the instance initialization with _GEO_COUNTRIES and | ||||
|         _GEO_IP_BLOCKS. | ||||
|  | ||||
|         You may also manually call it from extractor's code if geo countries | ||||
|         You may also manually call it from extractor's code if geo bypass | ||||
|         information is not available beforehand (e.g. obtained during | ||||
|         extraction) or due to some another reason. | ||||
|         extraction) or due to some other reason. In this case you should pass | ||||
|         this information in geo bypass context passed as first argument. It may | ||||
|         contain following fields: | ||||
|  | ||||
|         countries:  List of geo unrestricted countries (similar | ||||
|                     to _GEO_COUNTRIES) | ||||
|         ip_blocks:  List of geo unrestricted IP blocks in CIDR notation | ||||
|                     (similar to _GEO_IP_BLOCKS) | ||||
|  | ||||
|         """ | ||||
|         if not self._x_forwarded_for_ip: | ||||
|             country_code = self._downloader.params.get('geo_bypass_country', None) | ||||
|             # If there is no explicit country for geo bypass specified and | ||||
|             # the extractor is known to be geo restricted let's fake IP | ||||
|             # as X-Forwarded-For right away. | ||||
|             if (not country_code and | ||||
|                     self._GEO_BYPASS and | ||||
|                     self._downloader.params.get('geo_bypass', True) and | ||||
|                     countries): | ||||
|                 country_code = random.choice(countries) | ||||
|             if country_code: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) | ||||
|  | ||||
|             # Geo bypass mechanism is explicitly disabled by user | ||||
|             if not self._downloader.params.get('geo_bypass', True): | ||||
|                 return | ||||
|  | ||||
|             if not geo_bypass_context: | ||||
|                 geo_bypass_context = {} | ||||
|  | ||||
|             # Backward compatibility: previously _initialize_geo_bypass | ||||
|             # expected a list of countries, some 3rd party code may still use | ||||
|             # it this way | ||||
|             if isinstance(geo_bypass_context, (list, tuple)): | ||||
|                 geo_bypass_context = { | ||||
|                     'countries': geo_bypass_context, | ||||
|                 } | ||||
|  | ||||
|             # The whole point of geo bypass mechanism is to fake IP | ||||
|             # as X-Forwarded-For HTTP header based on some IP block or | ||||
|             # country code. | ||||
|  | ||||
|             # Path 1: bypassing based on IP block in CIDR notation | ||||
|  | ||||
|             # Explicit IP block specified by user, use it right away | ||||
|             # regardless of whether extractor is geo bypassable or not | ||||
|             ip_block = self._downloader.params.get('geo_bypass_ip_block', None) | ||||
|  | ||||
|             # Otherwise use random IP block from geo bypass context but only | ||||
|             # if extractor is known as geo bypassable | ||||
|             if not ip_block: | ||||
|                 ip_blocks = geo_bypass_context.get('ip_blocks') | ||||
|                 if self._GEO_BYPASS and ip_blocks: | ||||
|                     ip_block = random.choice(ip_blocks) | ||||
|  | ||||
|             if ip_block: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block) | ||||
|                 if self._downloader.params.get('verbose', False): | ||||
|                     self._downloader.to_screen( | ||||
|                         '[debug] Using fake IP %s as X-Forwarded-For.' | ||||
|                         % self._x_forwarded_for_ip) | ||||
|                 return | ||||
|  | ||||
|             # Path 2: bypassing based on country code | ||||
|  | ||||
|             # Explicit country code specified by user, use it right away | ||||
|             # regardless of whether extractor is geo bypassable or not | ||||
|             country = self._downloader.params.get('geo_bypass_country', None) | ||||
|  | ||||
|             # Otherwise use random country code from geo bypass context but | ||||
|             # only if extractor is known as geo bypassable | ||||
|             if not country: | ||||
|                 countries = geo_bypass_context.get('countries') | ||||
|                 if self._GEO_BYPASS and countries: | ||||
|                     country = random.choice(countries) | ||||
|  | ||||
|             if country: | ||||
|                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) | ||||
|                 if self._downloader.params.get('verbose', False): | ||||
|                     self._downloader.to_screen( | ||||
|                         '[debug] Using fake IP %s (%s) as X-Forwarded-For.' | ||||
|                         % (self._x_forwarded_for_ip, country_code.upper())) | ||||
|                         % (self._x_forwarded_for_ip, country.upper())) | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|   | ||||
| @@ -102,7 +102,9 @@ class DPlayIE(InfoExtractor): | ||||
|         display_id = mobj.group('id') | ||||
|         domain = mobj.group('domain') | ||||
|  | ||||
|         self._initialize_geo_bypass([mobj.group('country').upper()]) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': [mobj.group('country').upper()], | ||||
|         }) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|   | ||||
| @@ -123,7 +123,7 @@ class GoIE(AdobePassIE): | ||||
|                         'adobe_requestor_id': requestor_id, | ||||
|                     }) | ||||
|                 else: | ||||
|                     self._initialize_geo_bypass(['US']) | ||||
|                     self._initialize_geo_bypass({'countries': ['US']}) | ||||
|                 entitlement = self._download_json( | ||||
|                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', | ||||
|                     video_id, data=urlencode_postdata(data)) | ||||
|   | ||||
| @@ -282,7 +282,9 @@ class LimelightMediaIE(LimelightBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         video_id = self._match_id(url) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|         }) | ||||
|  | ||||
|         pc, mobile, metadata = self._extract( | ||||
|             video_id, 'getPlaylistByMediaId', | ||||
|   | ||||
| @@ -227,14 +227,16 @@ class TVPlayIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|         }) | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         geo_country = self._search_regex( | ||||
|             r'https?://[^/]+\.([a-z]{2})', url, | ||||
|             'geo country', default=None) | ||||
|         if geo_country: | ||||
|             self._initialize_geo_bypass([geo_country.upper()]) | ||||
|             self._initialize_geo_bypass({'countries': [geo_country.upper()]}) | ||||
|         video = self._download_json( | ||||
|             'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|   | ||||
| @@ -249,6 +249,10 @@ def parseOpts(overrideArguments=None): | ||||
|         '--geo-bypass-country', metavar='CODE', | ||||
|         dest='geo_bypass_country', default=None, | ||||
|         help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') | ||||
|     geo.add_option( | ||||
|         '--geo-bypass-ip-block', metavar='IP_BLOCK', | ||||
|         dest='geo_bypass_ip_block', default=None, | ||||
|         help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation (experimental)') | ||||
|  | ||||
|     selection = optparse.OptionGroup(parser, 'Video Selection') | ||||
|     selection.add_option( | ||||
|   | ||||
| @@ -3534,10 +3534,13 @@ class GeoUtils(object): | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def random_ipv4(cls, code): | ||||
|         block = cls._country_ip_map.get(code.upper()) | ||||
|         if not block: | ||||
|             return None | ||||
|     def random_ipv4(cls, code_or_block): | ||||
|         if len(code_or_block) == 2: | ||||
|             block = cls._country_ip_map.get(code_or_block.upper()) | ||||
|             if not block: | ||||
|                 return None | ||||
|         else: | ||||
|             block = code_or_block | ||||
|         addr, preflen = block.split('/') | ||||
|         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] | ||||
|         addr_max = addr_min | (0xffffffff >> int(preflen)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․