原本想整活弄点福利但是老是被覆盖
import random
import os #创建文件
from selenium import webdriver
import requests
from lxml import etree
from selenium.webdriver import ChromeOptions #这个包用来规避被检测的风险
import time
from urllib import request
import pandas as pd
driver_path=r'C:\Users\哥斯拉\AppData\Local\Google\Chrome\Application\chromedriver.exe' #定义好路径
#这两段代码规避检测
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
driver=webdriver.Chrome(executable_path=driver_path,options=option) #初始化路径+规避检测
driver.get('https://www.enterdesk.com/search/1-0-7-0-0-0/%E9%BB%91%E4%B8%9D')
js='document.documentElement.scrollTop=10000'
driver.execute_script(js)
time.sleep(2)
driver.execute_script(js)
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") #滚动到底部,但是如果是AJX的交互的没法滚到底部只能滚一点
p=0
def simulation_operation():
souce=driver.page_source
listpages=driver.find_element_by_xpath('//div[@class="listpages"]') #最底部
get_img(souce)
time.sleep(round(random.uniform(1, 4), 2))
while True:
driver.execute_script(js)
if "listpages" in listpages.get_attribute("class"):
break
else:
driver.execute_script(js)
#循环拉动滚轮到页面底部
def get_img(url):
html=etree.HTML(url)
imgs=html.xpath('//div[@class="egeli_pic_li"]//img/@src')
titles=html.xpath('//div[@class="egeli_pic_li"]//img/@title')
for title in titles:
kk=title
for img in imgs:
page=requests.get(img).content
# tt=page.content
with open('{}.jpg'.format(kk), mode='wb') as f:
f.write(page)
if __name__ == '__main__':
simulation_operation()
这是全部的源代码,我也查过资料说可能是文件名重复了,可我用下面代码已经获取了文件名也赋值了啊
imgs=html.xpath('//div[@class="egeli_pic_li"]//img/@src') #获取图片的url
titles=html.xpath('//div[@class="egeli_pic_li"]//img/@title') #获取到图片的名称
for title in titles:
kk=title
for img in imgs:
page=requests.get(img).content
# tt=page.content
with open('{}.jpg'.format(kk), mode='wb') as f:
f.write(page)
麻烦帮忙看看是哪里1有问题,可以的话帮忙完善一下代码,主要就卡在下载图片这里了