def ip_log(ip,port):
PROXY = f"{ip}:{port}" # 这里写你的代理
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
global browser
browser = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options)
def data(value):
# 执行一组js程序,拉到页面底部
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
global shop_name_list, shop_price_list, shop_people_list, shop_location_list,a
shop_name_list = []
shop_price_list = []
shop_people_list = []
shop_location_list = []
ip_list = ['36.248.132.187','36.248.132.23','122.4.48.145'] #输三个代理ip
port_list = [9999,9999,9999] #ip对应的port
a = 0
b = 44
c = 0
for i in range(1,6):
page = browser.page_source
soup = BeautifulSoup(page, 'lxml')
shop_data_list = soup.find('div', class_='grid g-clearfix').find_all_next('div', class_='items')
for shop_data in shop_data_list:
# 商品名称
shop_image_data = shop_data.find_all('div',class_='pic')
for shop_data_a in shop_image_data:
shop_data_a = shop_data_a.find_all('a',class_='pic-link J_ClickStat J_ItemPicA')
for shop_name in shop_data_a:
shop_name = shop_name.find_all('img')[0]['alt']
shop_name_list.append(shop_name)
# 商品价格
shop_price_data = shop_data.find_all('div',class_='price g_price g_price-highlight')
for shop_price in shop_price_data:
shop_price_list.append(shop_price.text.strip())
# 付款人数
shop_people_number_data = shop_data.find_all('div',class_='deal-cnt')
for shop_people_number in shop_people_number_data:
shop_people_list.append(shop_people_number.text)
#地址s
shop_location_data = shop_data.find_all('div',class_='location')
for shop_location in shop_location_data:
shop_location_list.append(shop_location.text)
# 实现动态加载代理ip
if c == 0:
ip_log(ip_list[c],port_list[c])
c += 1
if c == 1:
ip_log(ip_list[c],port_list[c])
c += 1
if c == 2:
ip_log(ip_list[c],port_list[c])
c = 0
shop_data = zip(shop_name_list,shop_price_list,shop_people_list,shop_location_list)
for data in shop_data:
print(data)
a += 1
b += 44
browser.get(f"https://s.taobao.com/search?q={value}&s={b}")
sleep(0.5)
print('已成功爬取:%s条信息'%a)
return shop_name_list, shop_price_list, shop_people_list, shop_location_list,a
python中selenium动态切换ip的问题(如何使每次切换页面只切换ip但不打开谷歌浏览器)
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
2条回答 默认 最新
- coagenth 2021-04-22 17:15关注
题主意思是每次切换时桌面不显示浏览器窗口吧,否则你不打开它,无法获取数据的,如果想要隐藏浏览器界面,在选项中设置,
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
两行的后面加上下面一句即可。
chrome_options.add_argument('--headless')#不显示浏览器窗口
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决评论 打赏 举报无用 1
悬赏问题
- ¥15 iis10中如何阻止别人网站重定向到我的网站
- ¥15 滑块验证码移动速度不一致问题
- ¥15 定制ai直播实时换脸软件
- ¥100 栈回溯相关,模块加载后KiExceptionDispatch无法正常回溯了
- ¥15 Utunbu中vscode下cern root工作台中写的程序root的头文件无法包含
- ¥15 麒麟V10桌面版SP1如何配置bonding
- ¥15 Marscode IDE 如何预览新建的 HTML 文件
- ¥15 K8S部署二进制集群过程中calico一直报错
- ¥15 java python或者任何一种编程语言复刻一个网页
- ¥20 如何通过代码传输视频到亚马逊平台