proxy_scraper/scrape_proxies.py

76 lines
2.5 KiB
Python

import requests
import json
formated_proxies = []
# fate0/proxylist
r = requests.get('https://raw.githubusercontent.com/fate0/proxylist/master/proxy.list')
for line in r.text.splitlines():
parsed = json.loads(line)
#if bool(parsed['anonymity'] == 'high_anonymous'):
proxy_string = f'{parsed["type"]}://{parsed["host"]}:{parsed["port"]}'
formated_proxies.append(proxy_string)
# ShiftyTR/Proxy-List
https = requests.get('https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/https.txt')
http = requests.get('https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt')
for line in https.text.splitlines():
proxy_string = f'https://{line}'
formated_proxies.append(proxy_string)
for line in http.text.splitlines():
proxy_string = f'http://{line}'
formated_proxies.append(proxy_string)
# rdavydov/proxy-list
r = requests.get('https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/http.txt')
for line in r.text.splitlines():
proxy_string = f'http://{line}'
formated_proxies.append(proxy_string)
# mmpx12/proxy-list
https = requests.get('https://raw.githubusercontent.com/mmpx12/proxy-list/master/https.txt')
http = requests.get('https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt')
for line in https.text.splitlines():
proxy_string = f'https://{line}'
formated_proxies.append(proxy_string)
for line in http.text.splitlines():
proxy_string = f'http://{line}'
formated_proxies.append(proxy_string)
# IshanSingla/proxy-list
# This one are updated infrequently
# https = requests.get('https://raw.githubusercontent.com/IshanSingla/proxy-list/main/proxys/https.txt')
# http = requests.get('https://raw.githubusercontent.com/IshanSingla/proxy-list/main/proxys/http.txt')
# for line in https.text.splitlines():
# proxy_string = f'https://{line}'
# formated_proxies.append(proxy_string)
# for line in http.text.splitlines():
# proxy_string = f'http://{line}'
# formated_proxies.append(proxy_string)
# monosans/proxy-list
r = requests.get('https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/http.txt')
for line in r.text.splitlines():
proxy_string = f'http://{line}'
formated_proxies.append(proxy_string)
# TheSpeedX/PROXY-List
r = requests.get('https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt')
for line in r.text.splitlines():
proxy_string = f'http://{line}'
formated_proxies.append(proxy_string)
# *******************
for x in list(set(formated_proxies)):
print(x)