#问题:想把一批大概500个代理ip存进mongo里,用for循环,但最终只能存进一个二个,请同仁们帮看下,问题出哪了。
#代理数据
import requests
from my_fake_useragent import UserAgent
from lxml import etree
def get_proxy():
#随机生成'user-agent'
headers = {'User-Agent':UserAgent().random()}
#主url
url = 'https://#@%!.#%&#@~.com/free/inha/'
#生成十个主页面
urls = [f'{url}{i}' for i in range(2,11)]
for i in urls:
#获取response
r = requests.get(url=i,headers=headers).content.decode('utf8')
#数据解译
html = etree.HTML(r)
ip = html.xpath('//div[@id="list"]//tbody/tr/td[1]/text()')
port = html.xpath('//div[@id="list"]//tbody//td[2]/text()')
#数据重构,返回值
for i,p in zip(ip,port):
proxies={'https':f'http://{i}:{p}',
'port':f'{p}',
'ip':f'{i}'
}
#存存文件
#with open('poxies.json','a')as f:
#f.write(json.dumps(proxies))
#返回数据
yield proxies
#存储mongo
import pymongo
import json
from ip_proxies import get_proxy
#连接数据库
client = pymongo.MongoClient('mongodb://127.0.0.1:27017')
#创建库
db = client['py-m-g']
#创建集合
proxy = db['proxies']
da =get_proxy() #获取一个ip和port字典。
for i in da:
try:
#创建主键
i['_id'] = i['ip']
proxy.insert_one(i)
#print('it is ok')
except Exception as ex:
print(ex)
break
运行结果:
E11000 duplicate key error collection: py-m-g.proxies index: id dup key: { _id: "202.55.5.209" }, full error: {'index': 0, 'code': 11000, 'keyPattern': {'_id': 1}, 'keyValue': {'_id': '202.55.5.209'}, 'errmsg': 'E11000 duplicate key error collection: py-m-g.proxies index: id dup key: { _id: "202.55.5.209" }'}
但数据库中也确实存了二三个数据,若存成json文件,一点问题都没有。