
def pa(): # 爬取动态下滑加载网页
options = webdriver.ChromeOptions()
options.binary_location = r"D:\Program Files\Google\Chrome\Application\chrome.exe"
options.add_argument('--disable-infobars') #去掉chrome正受到自动测试软件的控制的提示
options.add_argument('--disable-gpu') # 这个参数可以规避谷歌的部分bug
options.add_argument('User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0')
driver = webdriver.Chrome()
driver.get("https://www.zhihu.com/")
driver.maximize_window()
time.sleep(30)
#键盘操作
keyboard = driver.find_element(By.XPATH,'/html/body/div[1]/div/div[2]/header/div[1]/div[1]/div/form/div/div/label/input')
# 定位输入框并输入文本
keyboard.send_keys('西安旅游')
# 模拟回车键进行跳转(输入内容后)
keyboard.send_keys(Keys.ENTER)
time.sleep(3)
#鼠标操作筛选:只看文章
shaixuan1 = driver.find_element(By.XPATH,'/html/body/div[1]/div/main/div/div[1]/div/div/div')
# 对定位到的元素执行点击操作
ActionChains(driver).click(shaixuan1).perform()
shaixuan2 = driver.find_element(By.XPATH,'/html/body/div[1]/div/main/div/div[1]/div[2]/ul[1]/li[3]/div')
ActionChains(driver).click(shaixuan2).perform()
time.sleep(3)
js = '''
let height = 0
let interval = setInterval(() => {
window.scrollTo({
top: height,
behavior: "smooth"
});
height += 500
}, 500);
setTimeout(() => {
clearInterval(interval)
}, 7000);
'''
driver.execute_script(js)
time.sleep(30)
#用到了beautifulSoup库解析HTML
bs = BeautifulSoup(driver.page_source, "html.parser")
driver.close()
#beautifulSoup摘取HTML数据
# list = bs.select(".goods-item__title")
# for i in range(len(list)):
# list[i] = list[i].get_text()
# print("%s\n" % list[i])
想爬取知乎的文章,所有工作都做完了,网页能滚动,能爬动态加载的内容,但是发现第一步不行,进入知乎之后加载不出来内容
先是进入知乎,time.sleep一会去登录,然后搜索问题,之后再跳转到下一个界面就加载不出来,这是被反爬了吗,怎么解决呢
如果单开一个网页操作就没问题,用Python自动化操作就不行!