import time
import xlwt as xlwt
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
# driver = webdriver.Chrome()
service = Service(r"D:\chromedriver.exe")
driver = webdriver.Chrome(service=service)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.get('https://kns.cnki.net/kns8/defaultresult/index')
time.sleep(1)
driver.find_element(By.ID, "txt_search").send_keys('福建妇女') # .send_keys() 向搜索框中输入内容
driver.find_element(By.CLASS_NAME, "search-btn").click() # .click() 代表点击动作
time.sleep(1)
datalist = []
# html = driver.page_source
# print(html)
while True:
trs = driver.find_elements(By.XPATH, "//table[@class='result-table-list']/tbody/tr")
for tr in trs:
# 序号
order = tr.find_element(By.XPATH, "./td[@class='seq']").text
print(order)
# datalist.append(order)
# 题目
title = tr.find_element(By.XPATH, "./td[@class='name']").text
print(title)
# datalist.append(title)
# 论文链接
href = tr.find_element(By.XPATH, "./td[@class='name']/a").get_attribute('href')
print(href)
# datalist.append(href)
# 作者
author = tr.find_element(By.XPATH, "./td[@class='author']").text
print(author)
# datalist.append(author)
# 来源
source = tr.find_element(By.XPATH, "./td[@class='source']").text
print(source)
# datalist.append(source)
# 发表时间
pubtime = tr.find_element(By.XPATH, "./td[@class='date']").text
print(pubtime)
# datalist.append(pubtime)
# 发表类别
category = tr.find_element(By.XPATH, "./td[@class='data']").text
print(category)
# datalist.append(category)
datalist.append([order, title, href, author, source, pubtime, category])
# 下一页
try:
driver.find_element(By.ID, "PageNext").click()
time.sleep(2)
html = driver.page_source
print(html)
except:
print("没有下一页了")
break
driver.quit()
print("saving")
print(datalist)
workbook = xlwt.Workbook(encoding="utf-8")
worksheet = workbook.add_sheet('sheet1', cell_overwrite_ok=True)
col = ("序号", "题目", "链接", "作者", "来源", "发表时间", "类别")
for i in range(0, 7):
worksheet.write(0, i, col[i])
for i in range(len(datalist)):
datalist2 = datalist[i]
for j in range(0, 7):
worksheet.write(i + 1, j, datalist2[j])
workbook.save("zhiwang2.xls")