import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return '产生错误'
def fillUnivList(ulist, html):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].string, tds[1].string, tds[2].string])
def printUnivList(ulist, name):
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
print(tplt.format('排名','学校名称','省市',chr(12288)))
for i in range(len(ulist)):
u = ulist[i]
if u[2]==name:
print(tplt.format(u[0], u[1], u[2],chr(12288)))
def main():
uinfo = []
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
html = getHTMLText(url)
fillUnivList(uinfo, html)
name = input().strip()
printUnivList(uinfo, name)
main()
以上是爬取最好大学排名,spyder显示以下错误:
File "D:\mydata\anoconda\lib\site-packages\bs4__init__.py", line 283, in init
self.builder.initialize_soup(self)
AttributeError: 'HTMLParserTreeBuilder' object has no attribute 'initialize_soup'
我打开_htmlparser.py发现HTMLParserTreeBuilder确实没有nitialize_soup',但不知道怎么修改,也不确定是否在此文件改不敢下手,还望大神相助,指点下,要不作业完成不都了啦▽〒