import re
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen('https://www.qidian.com/rank/yuepiao/year2022-month08/')
# print(html)
obj = BeautifulSoup(html, 'lxml')
# print(obj)
for i in obj.find(attrs={'class': 'list_type_detective'}): # 提取class为list_type_detective的元素
print(i)
print('*'*8)
for i in obj.select('ul.list_type_detective > li > a'): # 提取class为list_type_detective的ul里的li中的a标签
print(i['href'])
print('-'*8)
for i in obj.find_all('a', href=re.compile('^//www')): # 提取以”//www"开头的a标签
print(i)
可参考:https://baijiahao.baidu.com/s?id=1703234898700447231&wfr=spider&for=pc