爬虫爬取的源代码和直接从网页打开的源代码不一样
import re
import requests
from bs4 import BeautifulSoup
import webbrowser
def Get_web(url): #获取网页的HTML文档。这里web_info 显示的HTML文件和直接从网页查看源文件的都不一样了
try:
r = requests.get(url,headers={'user-agent':'Mozilla/5.0'})
print("!")
print(r.raise_for_status())
r.encoding=r.apparent_encoding
web_info=r.text
print(web_info)
except:
print("error")
return web_info
def Process_text(web_info):
soup = BeautifulSoup(web_info,"html.parser")
script = soup.find_all("script")
print(script)
Processed_text=[]
return Processed_text
def Print_text(Processed_text):
pass
def main():
item="螺狮粉"
for num in range(0,1):
url="https://s.taobao.com/search?q=螺狮粉&s=0"
webbrowser.open(url, new=0, autoraise=True)
web_info = Get_web(url)
Process_text(web_info)
main()