大体就是。 用aiohttp写的异步爬虫。但是当我使用代理时 。我用几个平台会显示我使用了双倍个。但是实测单多线程是没有这个问题的。 是哪里重复了?
直接上代码把。
```python
import requests
from lxml import etree
# 写的一个函数。 返回一个列表 里边是代理。
def proxy_pool():
q = input('输入想从代理池中取出多少ip?\n' )
url = f'http://t.ipjldl.com/index.php/api/entry?method=proxyServer.generate_api_url&packid=1&fa=0&fetch_key=&groupid=0&qty={q}&time=1&pro=&city=&port=1&format=txt&ss=1&css=&dt=1&specialTxt=3&specialJson=&usertype=14'
headers = {
'User-Agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
'Chrome/91.0.4472.101 Safari/537.36')
}
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
proxy_list = tree.xpath('//body//text()')[0].split('\r\n')
http_proxy = [] # 代理池
for proxy in proxy_list:
# request的代理方式
# dic = {
# 'http': 'http://'+proxy
# }
# 异步的代理方式
dic = 'http://'+proxy
http_proxy.append(dic)
print(f'提取的代理 : {dic}')
return http_proxy
# 简单测试 这里提取一个 平台就会显示我使用了两个!
import aiohttp
import asyncio
from proxy import proxy_pool
import random
async def crawl(session, url, proxy_auth):
try:
async with session.get(url=url, proxy=random.choice(proxy_list),proxy_auth=proxy_auth) as response:
print(await response.text())
print('success')
except Exception as e:
print(e)
print('error')
async def main():
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
proxy_auth = aiohttp.BasicAuth('xxxxx', 'xxxxx')
url = 'https://httpbin.org/ip'
tasks = [asyncio.create_task(crawl(session, url, proxy_auth=proxy_auth))]
await asyncio.wait(tasks)
if __name__ == '__main__':
proxy_list = proxy_pool()
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
```