python代码
from pony.orm import *
db = Database()
class Tm(db.Entity):
id = PrimaryKey(int, auto=True)
bh = Optional(str)
bt = Optional(str)
fs = Optional(int)
tgs = Optional(int)
tjs = Optional(int)
tgl = Optional(str)
tmnl = Optional(str)
html = Optional(str)
tx = Optional(str)
xqylj = Optional(str)
# db.generate_mapping()
db.bind(provider='sqlite', filename='e:/python/Tm.sqlite')
db.generate_mapping(create_tables=True)
set_sql_debug(True)
import time
from selenium import webdriver
driver = webdriver.Chrome()
url = 'https://pintia.cn/problem-sets/1111652100718116864/problems/type/7'
driver.get(url)
time.sleep(1)
html = driver.page_source
import re
reobj = re.compile(r'<tr><td class="answerIcon_1du7d"></td><td>([\d\D]*?)</td><td><a href="([\d\D]*?)" class="">([\d\D]*?)</a></td><td>([\d\D]*?)</td><td>([\d\D]*?)</td><td>([\d\D]*?)</td><td>([\d\D]*?)</td></tr>')
for i in range(2):#i用来判断是函数题页面还是编程题页面
for match in reobj.finditer(html):
bhs = match.group(1)#获取题目标号
if "函数" in bhs:#如果标号中有函数这两个字就是函数题,否则就是编程题
txs = "函数题"
else:
txs = "编程题"
bts = match.group(3)#获取题目标题
fss = match.group(4)#获取题目分数
tgss = match.group(5)#获取题目通过数
tjss = match.group(6)#获取题目提交数
tgls = match.group(7)#获取题目通过率
url2 = "http://pintia.cn" + match.group(2)#题目链接
driver2 = webdriver.Chrome()
driver2.get(url2)#打开题目链接
time.sleep(3)
f = False#f判断是否有无题目
htmls = driver2.page_source
reobj2 = re.compile(r'<div class="problem_Yd8rq p-3 mb-3 background-grey-1"><div class="ques-view"><p>([\d\D]*?)</p>')
for match2 in reobj2.finditer(htmls):#获取详细题目内容
tmnls = match2.group(1)
f = True
if f == False:#f用来判断是否有详细题目内容,若没有则为无
tmnls = "无"
s = Tm(bh = bhs,bt = bts,fs = fss,tgs = tgss,tjs = tjss,tgl = tgls,tmnl = tmnls,html = htmls,tx = txs,xqylj = url2)
time.sleep(3)
db.commit()
driver2.quit()#关闭题目详情页链接的浏览器
if i == 0:
driver.find_element_by_xpath("//div[@id='sparkling-daydream']/div[3]/div[3]/div/ul/li/a/div/div[2]").click()#点击函数题,变为函数题页面
html = driver.page_source#获取函数题的网页源代码
f.close()