有的语法更新了我改了下,然后现在貌似是点击下一页那里出错了,怎么改
"""
巨潮资讯网数据挖掘实战--获取套期保值公告:
1.搜索多个关键字
2.实现翻页功能
3.正则提取
4.数据清洗
5.存储到excel
"""
from selenium import webdriver
import time
import re
def tao_bao(keyword):
# 1.无界面浏览
# chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
# browser = webdriver.Chrome(options=chrome_options)
browser = webdriver.Chrome() # 模拟谷歌浏览器
browser.maximize_window() # 最大化页面
url='http://www.cninfo.com.cn/new/fulltextSearch?notautosubmit=&keyWord=' + keyword
browser.get(url)
time.sleep(3)
browser.find_element(By.XPATH,r' // *[ @ id = "calendar"] / div / span / div / div / input[1]').send_keys('2014-01-01')
browser.find_element(By.XPATH,r'// *[ @ id = "calendar"] / div / span / div / div / input[2]').send_keys('2020-01-01')
browser.find_element(By.XPATH,r'// *[ @ id = "calendar"] / button / i').click() # 点击搜索
time.sleep(2)
# 计算总页数
data = browser.page_source
p_count = '</div> <span class="total-box" style="">共 (.*?) 条 当前显示.*?条</span></div>'
count = re.findall(p_count, data)[0] # 获取总条数 class ="total-box" style="" > 共 23 条 当前显示21-23条 < / span >
pages = int(int(count) / 10) # 获取总页数
if pages>100:
pages=100
else:
pages=pages
# print(data)
# 2.自动翻页获取源代码
datas=[]
datas.append(data)
for i in range(pages):
browser.find_element(By.XPATH,r'//*[@id="fulltext-search"]/div/div[1]/div[2]/div[4]/div[2]/div/button[2]/i').click() # 点击下一页按钮
time.sleep(2)
data = browser.page_source
datas.append(data)
time.sleep(2)
alldata = "".join(datas) # 将列表转换为字符串
# browser.quit()
# 2.正则提取
p_title = '<a target="_blank".*?class="r-title">(.*?)</span>'
p_href = '<a target="_blank" href="(.*?)" data-id="'
p_shares = '<a target="_blank".*?data-seccode="(.*?)" class=' # 提取股票代码
p_date = '<a target="_blank" href=".*?;announcementTime=(.*?)" data-id="' # 提取发布日期
title = re.findall(p_title,alldata)
href = re.findall(p_href,alldata)
shares = re.findall(p_shares,alldata)
date = re.findall(p_date,alldata)
# print(title)
# print(len(title))
# print(href)
# print(len(href))
# print(shares)
# print(len(shares))
# print(date)
# print(len(date))
# 3.数据清洗
for i in range(len(title)):
title[i] = re.sub('<.*?>','',title[i])
href[i] = 'https://www.cnifo.com.cn' + href[i]
href[i] = re.sub('amp;','',href[i])
# print(str(i+1) + '.' + shares[i] +'-'+ title[i] + '-' + date[i])
# print(href[i])
# 4. 写进excel文件
file1 = open('/Users/hsx/Desktop/爬虫/套期保值公告爬取.csv', 'a') #
file1.write(keyword + '公告completed' + '\n' + '\n')
for i in range(len(title)):
file1.write(str(i+1) + '/' + shares[i] +'/'+ title[i] + '/' + date[i] + href[i])
file1.write('----------' + '\n')
file1.close()
# 5.函数定义及调用
keywords = ['套保','套期保值']
for i in keywords:
tao_bao(i)vv
报错是这样的
NoSuchElementException Traceback (most recent call last)
Input In [18], in <cell line: 89>()
88 keywords = ['向特定对象发行股票']
89 for i in keywords:
---> 90 tao_bao(i)
Input In [18], in tao_bao(keyword)
43 datas.append(data)
44 for i in range(pages):
---> 45 browser.find_element(By.XPATH,r'//*[@id="fulltext-search"]/div/div[1]/div[2]/div[4]/div[2]/div/button[2]/i').click() # 点击下一页按钮
46 time.sleep(2)
47 data = browser.page_source
File D:\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py:861, in WebDriver.find_element(self, by, value)
858 by = By.CSS_SELECTOR
859 value = '[name="%s"]' % value
--> 861 return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]
File D:\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py:444, in WebDriver.execute(self, driver_command, params)
442 response = self.command_executor.execute(driver_command, params)
443 if response:
--> 444 self.error_handler.check_response(response)
445 response["value"] = self._unwrap_value(response.get("value", None))
446 return response
File D:\anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py:249, in ErrorHandler.check_response(self, response)
247 alert_text = value["alert"].get("text")
248 raise exception_class(message, screen, stacktrace, alert_text) # type: ignore[call-arg] # mypy is not smart enough here
--> 249 raise exception_class(message, screen, stacktrace)
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="fulltext-search"]/div/div[1]/div[2]/div[4]/div[2]/div/button[2]/i"}
(Session info: chrome=108.0.5359.125)
Stacktrace:
Backtrace:
(No symbol) [0x0041E563]
(No symbol) [0x003A7FC1]
(No symbol) [0x0029D04D]
(No symbol) [0x002CC0B0]
(No symbol) [0x002CC22B]
(No symbol) [0x002FE612]
(No symbol) [0x002E85D4]
(No symbol) [0x002FC9EB]
(No symbol) [0x002E8386]
(No symbol) [0x002C163C]
(No symbol) [0x002C269D]
GetHandleVerifier [0x006B9B82+2658722]
GetHandleVerifier [0x006ACB84+2605476]
GetHandleVerifier [0x004C825A+620666]
GetHandleVerifier [0x004C6E80+615584]
(No symbol) [0x003B05EC]
(No symbol) [0x003B5958]
(No symbol) [0x003B5A45]
(No symbol) [0x003C050B]
BaseThreadInitThunk [0x75D16739+25]
RtlGetFullPathName_UEx [0x77428AFF+1215]
RtlGetFullPathName_UEx [0x77428ACD+1165]