本人python新手,想用beautifulsoup爬取网页内容(比如东方财富网的沪深港通资金流向http://data.eastmoney.com/hsgt/index.html),自己通过浏览器F12能找到关键的每日top10股票,但是用beautifulsoup爬取后,却缺失这一部分内容。请问是什么原因?我试过很多网站都是会缺少我最想要的关健内容,请问怎么破?
def askURL(url):
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"}
request=urllib.request.Request(url,headers=head)
html=''
try:
response=urllib.request.urlopen(request)
html=response.read().decode('utf-8')
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
return html
def getData(baseurl)
for i in range(1,2):
url=baseurl+str(i)
html=askURL(url)
# print(html)
# break
soup =BeautifulSoup(html,"html.parser")
for item in soup.find_all('div',class_="item"):
data=[]
item=str(item)
link = re.findall(findLink, item)[0]
data.append(link)
number=re.findall(findNumber,item)[0]
data.append(number)
date=re.findall(findNumber,item)[1]
data.append(date)
pic=re.findall(findPic,item)[0]
data.append(pic)
datalist.append(data)
return datalist