import re
from bs4 import BeautifulSoup as B
from url_manager import UrlManger as U
import requests
import os
url="http://www.crazyant.net"
u_1 = U.UrlManger()
u_1.add_url(url)
while len(u_1.new_urls)!=0:
url_1 = u_1.get_url()
response = requests.get(url_1 , timeout=3)
response.encoding = "utf-8"
html = response.text
soup = B(html,"html.parser")
soup_1 = soup.find_all("a")
for soup_2 in soup_1:
u_1.add_url(soup_2["href"])
print(soup_2["href"], soup_2.get_text())
Traceback (most recent call last):
File "D:\python文件储存\每日一练\第12练.py", line 18, in
u_1.add_url(soup_2["href"])
File "D:\python\lib\site-packages\bs4\element.py", line 1519, in getitem
return self.attrs[key]
KeyError: 'href'