1、爬虫返回的response内容完整,但是用etree.HTML解析后,内容就变少了,导致不能用xpath定位,是为啥?
import requests
from lxml import etree
url = "https://tieba.baidu.com/f?fr=wwwt&kw=%E4%B8%8D%E8%89%AF%E4%BA%BA"
headers = {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
response = requests.get(url,headers=headers).content.decode()
print(response)
html_str = etree.HTML(response)
print(etree.tostring(html_str).decode())
# li = html_str.xpath("//ul[@id='thread_list']/li[@class='j_thread_list clearfix']")
# print(li)