这是一个代理抓取脚本,用request库从一个代理网站上面抓取代理并进行验证。验证网站是 https://ipv4.icanhazip.com/ ,可以显示当前客户端的ip地址。
我的预期是,通过requests 的proxies参数用代理访问这个网站时,脚本显示的是代理的ip地址,但实际运行时,脚本显示的却是我本地的ip地址。
请问是什么原因呢?谢谢
```python
import requests
from lxml import etree
# 获取快代理首页的代理
def get_proxy_list(i):
url = f"https://www.kuaidaili.com/free/inha/{i}/" #https://www.freeproxylists.net/zh/?s=u https://ip.jiangxianli.com/?page=1
payload = {}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
'Accept': 'application/json, text/javascript, */*; q=0.01',
}
response = requests.request("GET", url, headers=headers, data=payload)
res = []
_ = etree.HTML(response.text)
type_dct = {
"HTTP": "http://",
"HTTPS": "https://"
}
data_list = _.xpath("//tbody/tr")
for data in data_list:
ip = data.xpath("./td[1]/text()")[0]
port = data.xpath("./td[2]/text()")[0]
type = data.xpath("./td[4]/text()")[0]
res.append(type_dct[type] + ip + ':' + port)
return res
# 测试代理
def check(proxy):
href='https://ipv4.icanhazip.com'
if 'https' in proxy:
proxies = {'https': proxy}
else:
proxies = {'http': proxy}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4396.0 Safari/537.36'
}
try:
r = requests.get(href, proxies=proxies, timeout=10, headers=headers)
print(r.content)
if r.status_code == 200:
return True
except:
return False
if __name__ == '__main__':
for i in range(1,6):
proxy_list = get_proxy_list(i)
print(proxy_list)
for p in proxy_list:
print(p, check(p))
```