from bs4 import BeautifulSoup
import requests
import openpyxl
def get_html(url):
try:
r=requests.get(url)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
r="fail"
return r
def get_contents(ulist,rurl):
soup=BeautifulSoup(rurl,'html.parser')
trs=soup.find_all('tr')
for tr in trs:
ui=[]
for td in tr:
ts=td.string
if ts =='':
continue
ts=ts.strip()
ui.append(ts)
ulist.append(ui)
def saveList(ulist):
wb=openpyxl.Workbook()
ws=wb.active
r=1
for line in ulist:
for col in range(1,len(line)+1):
ws.cell(row=r,column=col).value=line[col-1]
r=r+1
wb.save("d:\\work.xlsx")
if __name__ =="__main__":
urli=[]
url="https://gdp.gotohui.com"
rs=get_html(url)
get_contents(urli,rs)
saveList(urli)
报错
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-17-6fe05a5014a7> in <module>
36 url="https://gdp.gotohui.com"
37 rs=get_html(url)
---> 38 get_contents(urli,rs)
39 saveList(urli)
<ipython-input-17-6fe05a5014a7> in get_contents(ulist, rurl)
20 if ts =='':
21 continue
---> 22 ts=ts.strip()
23 ui.append(ts)
24 ulist.append(ui)
AttributeError: 'NoneType' object has no attribute 'strip'