想要通过selenium和xpath获取“中招体育考试”百度资讯每条新闻的内容,但一直返回列表不知道哪里出现了问题。
初学者,求各位指点
from lxml import etree
from time import sleep
from selenium.webdriver import ChromeOptions
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
s = Service(executable_path=r'C:\Users\86198\Desktop\python基础\chromedriver.exe')
brs = webdriver.Chrome(service=s)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'}
brs.get('https://www.baidu.com/')
sleep(1)
brs.find_element(By.XPATH,r'//*[@id="kw"]').send_keys('中招体育考试')
brs.find_element(By.XPATH,r'//*[@id="su"]').click()
sleep(2)
brs.find_element(By.XPATH,r'//*[@id="s_tab"]/div/a[1]').click() # 点击资讯
brs.find_element(By.XPATH,r'//*[@id="header_top_bar"]/div[2]/span').click()
brs.find_element(By.XPATH,r'//*[@id="c-tips-container"]/div[1]/div/div/ul[2]/li/a').click() # 点击按时间排序
brs.find_element(By.XPATH,r'//*[@id="header_top_bar"]/div[1]/div/span').click()
brs.find_element(By.XPATH,r'//*[@id="c-tips-container"]/div[2]/div/div/ul/li[3]/a').click() # 点百家号
sleep(2)
labels = brs.find_elements(By.XPATH,r'//div[@id="content_left"]/div[@class="result-op c-container xpath-log new-pmd"]')
length = len(labels)
for i in range(0,length):
labels[i].find_element(By.XPATH,r'.//h3/a').click()
sleep(5)
text = brs.page_source
html = etree.HTML(text)
title = html.xpath('//*[@id="ssr-content"]/div[2]/div/div[1]/div[1]/div/div[1]/text()')
source = html.xpath('//*[@id="ssr-content"]/div[2]/div/div[1]/div[1]/div/div[2]/div[2]/a/p/text()')
print(title,source)
sleep(5)