import requests
from lxml import etree
from bs4 import BeautifulSoup
import time
import pandas as pd
proxies = {
'https': 'http://127.0.0.1:10818',
'http': 'http://127.0.0.1:10818'
}
hd = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36",
"Accept": "application/json",
"Cache-Control": "no-cache"
}
resp = requests.get('https://awesomepolygon.com/',headers=hd)
print(resp.status_code)
html = BeautifulSoup(resp.text,"lxml")
temp = html.find_all("div", class_="col-lg-4")
info=[]
for i in temp:
name = i.find("h2", class_="crd-title").text.strip()
short_des = i.find("p").get_text()
url = i.find("a").get('href').strip()
info.append([name,short_des,url])
返回:
200
Traceback (most recent call last):
File "/Users/paul/PycharmProjects/pythonProject/official/maticproject.py", line 22, in
short_des = i.find("p").get_text()
AttributeError: 'NoneType' object has no attribute 'get_text'